diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1093adb3d..9da15bccb 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -62,7 +62,7 @@ jobs: task: name: Data pipeline run: | - python scripts/prepare_memmap_dataset.py test_fixtures/*.json.gz -o /tmp/c4-sample.npy --validate + python scripts/prepare_memmap_dataset.py test_fixtures/*.json.gz -o /tmp/c4-sample.npy --validate --ack-deprecated steps: - uses: actions/checkout@v3 diff --git a/CHANGELOG.md b/CHANGELOG.md index e28a4c03b..42439f5de 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added support for Grouped Query Attention. - Added commonsense_qa and social_iqa downstream evaluation tasks +- Added MMLU multiple choice (A/B/C/D) 5-shot variant downstream tasks ### Changed @@ -26,6 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Don't log garbage on nodes that aren't rank 0 - Don't crash in the HF code when we are referring to a tokenizer in a local file +- Fixed the size calculation for qk layer norm ## [v0.2.5](https://github.com/allenai/OLMo/releases/tag/v0.2.5) - 2024-03-06 @@ -35,11 +37,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added the option to directly pass input embeddings to `OLMo` and `OLMoForCausalLM`. - Added support for Python 3.8. - Added code to throw an error if `output_attentions` is set to `True` in forward call to `OLMoForCausalLM`. This functionality hasn't been implemented yet. -- Fixed running with data loading workers on LUMI +- Correct scheme displayed in error messages that come from R2 +- Fixed running with multiple data loading workers in LUMI - Minor bug fix: uninitialized prompts variable ### Added - Added `output_hidden_states` argument and associated functionality to `OLMo` and `OLMoForCausalLM` to return model intermediate hidden states. +- Ability to read from R2 like we read from S3 - Added MMLU downstream evaluation tasks, with prompt variations. - Added support for PyTorch v2.2. - Added ability to show logs from all ranks diff --git a/configs/mcli/olmo7-ablation-baseline.yaml b/configs/mcli/olmo7-ablation-baseline.yaml new file mode 100644 index 000000000..805138d98 --- /dev/null +++ b/configs/mcli/olmo7-ablation-baseline.yaml @@ -0,0 +1,47 @@ +name: olmo7-ablation-baseline # can't have "_" or "." here +image: mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04 +compute: + gpus: 64 + cluster: r7z2 + gpu_type: a100_40gb +integrations: + - integration_type: git_repo + git_repo: allenai/OLMo + git_branch: olmo7-ablations + #git_commit: d765e8819f5b0be204c96b0b519de2372b0da729 + pip_install: -e .[train] + ssh_clone: true +command: |- + pip freeze + mkdir -p /root/.cache/torch/ + + export OMP_NUM_THREADS=8 + export LOG_FILTER_TYPE=all_ranks + #export OLMO_NO_SSL=1 + + # warm up huggingface cache + pushd /root/.cache + curl "https://storage.googleapis.com/dirkgr-public/huggingface_cache.tar.gz" | tar -xzf - + popd + export HF_DATASETS_OFFLINE=1 + + cd OLMo + + torchrun \ + --master_addr $MASTER_ADDR \ + --master_port $MASTER_PORT \ + --nnodes $NUM_NODES \ + --node_rank $NODE_RANK \ + --nproc_per_node 8 \ + scripts/train.py configs/olmo7-ablation-baseline.yaml \ + --run_name=olmo7-ablation-baseline \ + --wandb.name=baseline \ + --model.flash_attention=true \ + --fsdp.wrapping_strategy=by_block_and_size \ + --fsdp.sharding_strategy=FULL_SHARD \ + --save_folder=runs/ \ + --activation_checkpointing=whole_layer \ + --device_train_microbatch_size=3 \ + --global_train_batch_size=6144 \ + --wandb.group=baseline3 \ + --remote_save_folder=s3://ai2-llm/checkpoints/olmo7-ablation/baseline3 diff --git a/configs/mcli/olmo7-ablation-dedupedocs.yaml b/configs/mcli/olmo7-ablation-dedupedocs.yaml new file mode 100644 index 000000000..ccd84be45 --- /dev/null +++ b/configs/mcli/olmo7-ablation-dedupedocs.yaml @@ -0,0 +1,46 @@ +name: olmo7-ablation-dedupedocs # can't have "_" or "." here +image: mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04 +compute: + gpus: 64 + cluster: r14z3p2 + gpu_type: h100_80gb +integrations: + - integration_type: git_repo + git_repo: allenai/OLMo + git_branch: olmo7-ablations + #git_commit: d765e8819f5b0be204c96b0b519de2372b0da729 + pip_install: -e .[train] + ssh_clone: true +command: |- + pip freeze + mkdir -p /root/.cache/torch/ + + export OMP_NUM_THREADS=8 + export LOG_FILTER_TYPE=all_ranks + #export OLMO_NO_SSL=1 + + # warm up huggingface cache + pushd /root/.cache + curl "https://storage.googleapis.com/dirkgr-public/huggingface_cache.tar.gz" | tar -xzf - + popd + export HF_DATASETS_OFFLINE=1 + + cd OLMo + + torchrun \ + --master_addr $MASTER_ADDR \ + --master_port $MASTER_PORT \ + --nnodes $NUM_NODES \ + --node_rank $NODE_RANK \ + --nproc_per_node 8 \ + scripts/train.py configs/olmo7-ablation-dedupedocs.yaml \ + --run_name=olmo7-ablation-dedupedocs \ + --wandb.name=dedupedocs \ + --model.flash_attention=true \ + --fsdp.wrapping_strategy=by_block_and_size \ + --fsdp.sharding_strategy=SHARD_GRAD_OP \ + --save_folder=runs/ \ + --device_train_microbatch_size=3 \ + --global_train_batch_size=6144 \ + --wandb.group=dedupedocs \ + --remote_save_folder=s3://ai2-llm/checkpoints/olmo7-ablation/dedupedocs diff --git a/configs/mcli/olmo7-ablation-dolma17.yaml b/configs/mcli/olmo7-ablation-dolma17.yaml new file mode 100644 index 000000000..30c3b70ec --- /dev/null +++ b/configs/mcli/olmo7-ablation-dolma17.yaml @@ -0,0 +1,47 @@ +name: olmo7-ablation-dolma17 # can't have "_" or "." here +image: mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04 +compute: + gpus: 128 + cluster: r12z3 + gpu_type: a100_40gb +integrations: + - integration_type: git_repo + git_repo: allenai/OLMo + git_branch: olmo7-ablations + #git_commit: d765e8819f5b0be204c96b0b519de2372b0da729 + pip_install: -e .[train] + ssh_clone: true +command: |- + pip freeze + mkdir -p /root/.cache/torch/ + + export OMP_NUM_THREADS=8 + export LOG_FILTER_TYPE=all_ranks + #export OLMO_NO_SSL=1 + + # warm up huggingface cache + pushd /root/.cache + curl "https://storage.googleapis.com/dirkgr-public/huggingface_cache.tar.gz" | tar -xzf - + popd + export HF_DATASETS_OFFLINE=1 + + cd OLMo + + torchrun \ + --master_addr $MASTER_ADDR \ + --master_port $MASTER_PORT \ + --nnodes $NUM_NODES \ + --node_rank $NODE_RANK \ + --nproc_per_node 8 \ + scripts/train.py configs/olmo7-ablation-dolma17.yaml \ + --run_name=olmo7-ablation-dolma17 \ + --wandb.name=dolma17 \ + --model.flash_attention=true \ + --fsdp.wrapping_strategy=by_block_and_size \ + --fsdp.sharding_strategy=FULL_SHARD \ + --save_folder=runs/ \ + --activation_checkpointing=whole_layer \ + --device_train_microbatch_size=3 \ + --global_train_batch_size=6144 \ + --wandb.group=dolma17 \ + --remote_save_folder=s3://ai2-llm/checkpoints/olmo7-ablation/dolma17 diff --git a/configs/olmo7-ablation-baseline.yaml b/configs/olmo7-ablation-baseline.yaml new file mode 100644 index 000000000..bfe40e48b --- /dev/null +++ b/configs/olmo7-ablation-baseline.yaml @@ -0,0 +1,640 @@ +run_name: olmo7-ablation +seed: 61394 +dry_run: false + +wandb: + name: ${run_name} + project: olmo7-ablations + group: olmo7-ablation + +model: + d_model: 4096 + n_heads: 32 + n_layers: 32 + # mlp_ratio: 6 + mlp_hidden_size: 22016 + weight_tying: false + alibi: false + rope: true + flash_attention: true + attention_dropout: 0.0 + attention_layer_norm: false + multi_query_attention: false + include_bias: false + block_type: sequential + layer_norm_type: default + layer_norm_with_affine: false + bias_for_layer_norm: false + attention_layer_norm_with_affine: false + activation_type: swiglu + residual_dropout: 0.0 + embedding_dropout: 0.0 + max_sequence_length: 2048 + vocab_size: 50280 + embedding_size: 50304 + eos_token_id: 0 + pad_token_id: 1 + init_device: meta + init_fn: mitchell + +compile: null + +optimizer: + name: adamw + learning_rate: 1.5e-4 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + metrics_log_interval: 10 + +scheduler: + name: linear_with_warmup + t_warmup: 1000 + alpha_f: 0.1 + +tokenizer: + identifier: tokenizers/allenai_eleuther-ai-gpt-neox-20b-pii-special.json + truncate_direction: right + +save_folder: ${oc.env:CHECKPOINTS_PATH}/${oc.env:SLURM_JOB_ID,${run_name}} +save_overwrite: false +# Sharded checkpoints (best for restarts) +save_interval: 200 +save_num_checkpoints_to_keep: -1 +# Unsharded checkpoints (for final storage) +save_interval_unsharded: null +save_num_unsharded_checkpoints_to_keep: -1 + +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/hrshlkzq/step119000-unsharded/ # 0.5T +load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/j18wauyq/step238000-unsharded/ # 1.0T +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/4xel5n7e/step358000-unsharded/ # 1.5T +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/xtruaap8/step477000-unsharded/ # 2.0T + +no_pre_train_checkpoint: true +reset_optimizer_state: true +reset_trainer_state: true + +max_duration: 100e9T +global_train_batch_size: 2048 +device_train_microbatch_size: 2 +time_limit: null + +precision: amp_bf16 + +fsdp: + wrapping_strategy: by_block_and_size + precision: mixed + +max_grad_norm: 1.0 +max_grad_norm_ratio: null + +speed_monitor: + window_size: 20 + +eval_interval: ${save_interval} +eval_subset_num_batches: -1 +device_eval_batch_size: ${device_train_microbatch_size} +evaluators: + - label: all-small-ppl-validation + data: + num_workers: 0 + drop_last: true + # pin_memory: true + # prefetch_factor: 1 + # persistent_workers: false + # timeout: 0 + datasets: + 4chan-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/4chan/val.npy + c4_100_domains-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_100_domains/val.npy + c4_en-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_en/val.npy + gab-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/gab/val.npy + ice-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ice/val.npy + m2d2_s2orc-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_s2orc/val.npy + m2d2_wiki-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_wiki/val.npy + manosphere-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/manosphere/val.npy + mc4_en-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/mc4_en/val.npy + pile-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/pile/val.npy + ptb-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ptb/val.npy + twitterAEE-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/twitterAEE/val.npy + wikitext_103-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/wikitext_103/val.npy + + ########################## + # Downstream evaluations # + ########################## + - label: piqa + type: downstream + + - label: hellaswag + type: downstream + + - label: winogrande + type: downstream + + - label: openbook_qa + type: downstream + + - label: boolq + type: downstream + + - label: sciq + type: downstream + + - label: arc_easy + type: downstream + + - label: arc_challenge + type: downstream + + - label: mmlu_stem + type: downstream + + - label: mmlu_humanities + type: downstream + + - label: mmlu_social_sciences + type: downstream + + - label: mmlu_other + type: downstream + + #- label: copa + # type: downstream + + #- label: rte + # type: downstream + + #- label: commitment_bank + # type: downstream + + #- label: mrpc + # type: downstream + + #- label: sst2 + # type: downstream + +data: + pad_direction: right + num_workers: 16 + drop_last: true + pin_memory: true + prefetch_factor: 1 + persistent_workers: true + timeout: 0 + paths: + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-000-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-000-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-001-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-001-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-002-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-002-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-003-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-003-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-004-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-004-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-005-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-005-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-006-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-006-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-006-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-007-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-007-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-008-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-008-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-008-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-009-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-009-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-010-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-010-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-010-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-011-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-011-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-012-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-012-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-013-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-013-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-013-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-014-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-014-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-014-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-015-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-015-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-016-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-016-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-017-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-017-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-018-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-018-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-019-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-019-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-020-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-020-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-021-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-021-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-022-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-022-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-023-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-023-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-024-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-024-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-025-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-025-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-025-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-026-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-026-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-027-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-027-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-027-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-028-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-028-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-028-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-029-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-029-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-030-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-030-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-031-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-031-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-032-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-032-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-033-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-033-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-033-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-034-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-034-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-034-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-035-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-035-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-036-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-036-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-037-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-037-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-038-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-038-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-039-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-039-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-040-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-040-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-041-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-041-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-042-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-042-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-042-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-043-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-043-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-043-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-044-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-044-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-044-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-045-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-045-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-046-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-046-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-046-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-046-00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-047-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-047-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-048-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-048-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-049-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-049-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-050-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-050-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-051-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-051-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-052-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-052-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-052-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-053-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-053-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-053-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-054-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-054-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-055-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-055-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-055-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-056-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-056-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-056-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-057-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-057-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-057-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-058-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-058-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-059-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-059-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-060-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-060-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-061-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-061-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-062-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-062-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-062-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-063-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-063-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-063-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-064-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-064-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-064-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-065-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-065-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-065-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-066-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-066-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-067-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-067-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-068-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-068-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-069-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-069-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-070-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-070-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-071-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-071-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-072-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-072-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-073-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-073-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-074-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-074-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-075-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-075-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-076-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-076-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-077-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-077-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-078-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-078-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-079-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-079-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-080-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-080-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-081-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-081-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-082-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-082-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-083-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-083-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-084-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-084-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-085-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-085-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-086-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-086-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-087-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-087-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-088-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-088-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-089-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-089-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-089-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-090-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-090-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-091-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-091-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-091-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-092-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-092-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-093-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-093-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-093-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-094-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-094-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-094-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-095-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-095-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-096-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-096-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-097-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-097-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-097-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-098-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-098-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-099-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-099-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-100-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-100-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-100-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-101-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-101-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-102-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-102-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-103-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-103-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-104-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-104-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-105-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-105-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-106-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-106-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-106-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-107-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-107-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-108-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-108-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-109-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-109-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-109-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-110-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-110-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-110-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-111-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-111-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-112-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-112-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-113-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-113-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-114-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-114-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-114-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-115-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-115-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-116-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-116-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-117-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-117-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-118-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-118-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-119-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-119-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-120-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-120-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-120-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-121-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-121-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-122-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-122-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-122-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-123-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-123-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-123-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-124-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-124-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-125-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-125-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-126-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-126-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-127-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-127-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-127-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-128-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-128-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-129-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-129-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-129-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-130-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-130-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-131-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-131-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-132-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-132-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-133-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-133-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-133-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-134-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-134-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-134-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-135-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-135-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-135-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-136-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-136-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-137-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-137-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-137-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-138-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-138-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-139-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-139-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-140-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-140-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-141-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-141-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-141-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-142-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-142-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-142-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-143-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-143-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-144-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-144-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-144-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-145-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-145-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-145-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-146-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-146-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-146-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-147-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-147-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-147-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-148-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-148-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-149-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-149-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-149-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-150-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-150-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-150-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-150-00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-151-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-151-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-152-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-152-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-153-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-153-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-154-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-154-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-155-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-155-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-155-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-156-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-156-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-157-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-157-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-157-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-158-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-158-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-159-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-159-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-160-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-160-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-161-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-161-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-161-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-162-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-162-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-163-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-163-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-164-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-164-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-165-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-165-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-165-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-166-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-166-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-166-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-167-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-167-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-167-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-168-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-168-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-169-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-169-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-170-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-170-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-171-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-171-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-172-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-172-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-173-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-173-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-173-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-174-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-174-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-174-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-175-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-175-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-175-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-176-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-176-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-176-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-177-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-177-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-178-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-178-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-179-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-179-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-180-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-180-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-181-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-181-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-182-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-182-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-182-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-183-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-183-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-183-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-184-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-184-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-185-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-185-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-185-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-186-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-186-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-186-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-187-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-187-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-187-00002.npy diff --git a/configs/olmo7-ablation-dedupedocs.yaml b/configs/olmo7-ablation-dedupedocs.yaml new file mode 100644 index 000000000..6cd75f2b6 --- /dev/null +++ b/configs/olmo7-ablation-dedupedocs.yaml @@ -0,0 +1,1618 @@ +run_name: olmo7-ablation-dedupedocs +seed: 61394 +dry_run: false + +wandb: + name: ${run_name} + project: olmo7-ablations + group: olmo7-ablation-dedupedocs + +model: + d_model: 4096 + n_heads: 32 + n_layers: 32 + # mlp_ratio: 6 + mlp_hidden_size: 22016 + weight_tying: false + alibi: false + rope: true + flash_attention: true + attention_dropout: 0.0 + attention_layer_norm: false + multi_query_attention: false + include_bias: false + block_type: sequential + layer_norm_type: default + layer_norm_with_affine: false + bias_for_layer_norm: false + attention_layer_norm_with_affine: false + activation_type: swiglu + residual_dropout: 0.0 + embedding_dropout: 0.0 + max_sequence_length: 2048 + vocab_size: 50280 + embedding_size: 50304 + eos_token_id: 0 + pad_token_id: 1 + init_device: meta + init_fn: mitchell + +compile: null + +optimizer: + name: adamw + learning_rate: 1.5e-4 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + metrics_log_interval: 10 + +scheduler: + name: linear_with_warmup + t_warmup: 1000 + alpha_f: 0.1 + +tokenizer: + identifier: tokenizers/allenai_eleuther-ai-gpt-neox-20b-pii-special.json + truncate_direction: right + +save_folder: ${oc.env:CHECKPOINTS_PATH}/${oc.env:SLURM_JOB_ID,${run_name}} +save_overwrite: false +# Sharded checkpoints (best for restarts) +save_interval: 200 +save_num_checkpoints_to_keep: -1 +# Unsharded checkpoints (for final storage) +save_interval_unsharded: null +save_num_unsharded_checkpoints_to_keep: -1 + +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/hrshlkzq/step119000-unsharded/ # 0.5T +load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/j18wauyq/step238000-unsharded/ # 1.0T +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/4xel5n7e/step358000-unsharded/ # 1.5T +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/xtruaap8/step477000-unsharded/ # 2.0T + +no_pre_train_checkpoint: true +reset_optimizer_state: true +reset_trainer_state: true + +max_duration: 100e9T +global_train_batch_size: 2048 +device_train_microbatch_size: 2 +time_limit: null + +precision: amp_bf16 + +fsdp: + wrapping_strategy: by_block_and_size + precision: mixed + +max_grad_norm: 1.0 +max_grad_norm_ratio: null + +speed_monitor: + window_size: 20 + +eval_interval: ${save_interval} +eval_subset_num_batches: -1 +device_eval_batch_size: ${device_train_microbatch_size} +evaluators: + - label: all-small-ppl-validation + data: + num_workers: 0 + drop_last: true + # pin_memory: true + # prefetch_factor: 1 + # persistent_workers: false + # timeout: 0 + datasets: + 4chan-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/4chan/val.npy + c4_100_domains-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_100_domains/val.npy + c4_en-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_en/val.npy + gab-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/gab/val.npy + ice-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ice/val.npy + m2d2_s2orc-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_s2orc/val.npy + m2d2_wiki-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_wiki/val.npy + manosphere-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/manosphere/val.npy + mc4_en-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/mc4_en/val.npy + pile-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/pile/val.npy + ptb-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ptb/val.npy + twitterAEE-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/twitterAEE/val.npy + wikitext_103-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/wikitext_103/val.npy + + ########################## + # Downstream evaluations # + ########################## + - label: piqa + type: downstream + + - label: hellaswag + type: downstream + + - label: winogrande + type: downstream + + - label: openbook_qa + type: downstream + + - label: boolq + type: downstream + + - label: sciq + type: downstream + + - label: arc_easy + type: downstream + + - label: arc_challenge + type: downstream + + - label: mmlu_stem + type: downstream + + - label: mmlu_humanities + type: downstream + + - label: mmlu_social_sciences + type: downstream + + - label: mmlu_other + type: downstream + + - label: mmlu_stem_var + type: downstream + + - label: mmlu_humanities_var + type: downstream + + - label: mmlu_social_sciences_var + type: downstream + + - label: mmlu_other_var + type: downstream + + #- label: copa + # type: downstream + + #- label: rte + # type: downstream + + #- label: commitment_bank + # type: downstream + + #- label: mrpc + # type: downstream + + #- label: sst2 + # type: downstream + +data: + pad_direction: right + num_workers: 16 + drop_last: true + pin_memory: true + prefetch_factor: 1 + persistent_workers: true + timeout: 0 + paths: + ######### NON WEB DATA ######### + # ~> GUTENBERG BOOKS (5.256 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + # ~> PES2O STEM PAPERS (57.21 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + # ~> WIKIPEDIA & WIKIBOOKS (3.689 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/wiki/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/wiki/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + # ~> REDPAJAMA STACKEXCHANGE (19.63 GT) + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + # ~> REDPAJAMA ARXIV (19.63 GT) + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + # ~> PROOFPILE2 ALGEBRAIC STACK (12.623 GT) + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + # ~> PROOFPILE2 OPENWEBMATH (12.734 GT) + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + # ~> TULU FLAN V0 (1.84 GT) + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-3-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-4-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-5-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-6-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-7-00000.npy + #################################### + ######### CODE ######### + # ~> STARCODER (263.775 GT) + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-00-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-03-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-04-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-05-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-06-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-07-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-08-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-09-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-10-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-11-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-12-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-13-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-14-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-15-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-16-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-17-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-18-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-19-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-20-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-21-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-22-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-23-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-24-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-25-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-26-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-27-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-30-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-31-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-32-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-33-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-34-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-35-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-36-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-37-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-38-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-39-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-40-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-41-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-42-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-43-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-44-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-46-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-47-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-48-00001.npy + #################################### + ######### WEB HIGH QUALITY ######### + # ~> C4 (157.2 GT) + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-49-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-50-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-51-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-52-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-53-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-54-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-55-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-56-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-57-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-58-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-59-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-60-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-61-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-62-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-63-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-64-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-65-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-66-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-67-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-68-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-69-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-70-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-71-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-72-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-73-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-74-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-75-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-76-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-77-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-78-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-79-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-80-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-81-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-82-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-83-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-84-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-85-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-86-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-87-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-88-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-89-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-90-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-91-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-92-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-93-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-94-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-95-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_doc_le030/gpt-neox-olmo-dolma-v1_5/part-96-00000.npy + # ~> REDDIT (79.988 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-49-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-50-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-51-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-52-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-53-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-54-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-55-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-56-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-57-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-58-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-59-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-60-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-61-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-62-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-63-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-64-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-65-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-66-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-67-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-68-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-69-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-70-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-71-00000.npy + # ~> FALCON (547.341 GT) + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-000-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-000-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-000-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-001-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-001-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-001-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-002-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-002-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-002-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-003-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-003-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-003-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-005-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-005-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-005-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-006-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-006-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-006-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-007-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-007-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-009-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-009-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-009-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-010-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-010-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-010-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-011-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-011-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-011-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-013-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-013-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-013-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-014-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-014-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-014-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-015-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-015-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-015-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-016-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-016-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-016-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-018-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-018-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-018-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-019-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-019-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-019-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-020-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-020-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-020-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-022-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-022-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-022-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-023-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-023-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-023-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-024-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-024-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-024-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-026-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-026-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-026-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-027-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-027-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-027-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-028-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-028-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-028-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-029-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-029-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-029-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-031-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-031-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-031-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-032-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-032-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-032-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-033-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-033-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-033-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-035-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-035-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-035-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-036-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-036-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-036-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-037-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-037-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-037-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-039-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-039-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-039-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-040-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-040-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-040-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-041-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-041-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-041-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-042-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-042-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-042-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-044-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-044-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-044-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-045-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-045-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-045-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-046-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-046-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-046-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-048-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-048-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-048-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-049-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-049-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-049-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-050-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-050-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-050-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-052-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-052-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-052-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-053-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-053-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-053-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-054-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-054-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-054-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-056-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-056-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-056-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-057-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-057-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-057-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-058-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-058-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-058-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-059-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-059-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-059-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-061-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-061-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-061-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-062-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-062-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-062-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-063-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-063-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-063-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-065-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-065-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-065-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-066-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-066-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-066-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-067-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-067-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-067-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-069-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-069-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-069-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-070-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-070-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-070-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-071-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-071-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-071-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-072-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-072-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-072-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-074-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-074-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-074-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-075-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-075-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-075-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-076-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-076-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-076-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-078-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-078-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-078-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-079-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-079-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-079-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-080-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-080-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-080-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-082-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-082-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-082-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-083-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-083-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-083-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-084-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-084-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-084-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-085-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-085-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-085-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-087-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-087-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-087-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-088-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-088-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-088-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-089-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-089-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-089-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-091-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-091-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-091-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-092-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-092-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-092-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-093-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-093-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-093-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-095-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-095-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-095-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-096-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-096-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-096-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-097-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-097-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-097-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-098-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-098-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-098-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-100-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-100-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-100-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-101-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-101-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-101-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-102-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-102-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-102-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-104-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-104-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-104-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-105-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-105-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-105-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-106-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-106-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-106-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-108-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-108-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-108-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-109-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-109-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-109-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-110-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-110-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-110-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-112-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-112-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-112-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-113-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-113-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-113-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-114-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-114-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-114-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-115-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-115-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-115-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-117-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-117-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-117-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-118-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-118-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-118-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-119-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-119-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-119-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-121-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-121-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-121-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-122-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-122-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-122-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-123-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-123-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-123-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-125-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-125-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-125-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-126-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-126-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-126-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-127-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-127-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-127-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-128-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-128-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-128-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-130-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-130-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-130-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-131-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-131-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-131-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-132-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-132-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-132-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-134-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-134-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-134-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-135-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-135-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-135-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-136-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-136-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-136-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-138-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-138-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-138-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-139-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-139-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-139-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-140-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-140-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-140-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-141-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-141-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-141-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-143-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-143-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-143-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-144-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-144-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-144-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-145-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-145-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-145-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-147-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-147-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-147-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-148-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-148-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-148-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-149-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-149-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-149-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-151-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-151-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-151-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-152-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-152-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-152-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-153-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-153-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-153-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-154-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-154-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-154-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-156-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-156-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-156-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-157-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-157-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-157-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-158-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-158-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-158-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-160-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-160-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-160-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-161-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-161-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-161-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-162-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-162-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-162-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-164-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-164-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-164-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-165-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-165-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-165-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-166-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-166-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-166-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-167-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-167-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-167-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-169-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-169-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-169-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-170-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-170-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-170-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-171-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-171-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-171-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-173-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-173-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-173-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-174-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-174-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-174-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-175-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-175-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-175-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-177-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-177-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-177-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-178-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-178-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-178-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-179-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-179-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-179-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-181-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-181-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-181-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-182-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-182-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-182-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-183-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-183-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-183-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-184-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-184-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-184-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-186-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-186-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-186-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-187-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-187-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-187-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-188-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-188-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-188-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-190-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-190-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-190-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-191-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-191-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-191-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-192-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-192-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-192-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-194-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-194-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-194-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-195-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-195-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-195-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-196-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-196-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-196-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-197-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-197-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-197-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-199-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-199-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-199-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-200-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-200-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-200-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-201-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-201-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-201-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-203-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-203-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-203-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-204-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-204-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-204-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-205-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-205-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-205-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-207-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-207-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-207-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-208-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-208-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-208-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-209-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-209-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-209-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-210-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-210-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-210-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-212-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-212-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-212-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-213-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-213-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-213-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-214-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-214-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-214-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-216-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-216-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-216-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-217-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-217-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-217-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-218-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-218-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-218-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-220-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-220-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-220-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-221-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-221-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-221-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-222-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-222-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-222-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-223-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-223-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-223-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-224-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-224-00001.npy + #################################### + ######### WEB REST ######### + # ~> DOLMA CC HEAD 50% (187.2 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-001-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-003-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-004-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-005-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-007-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-014-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-017-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-018-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-019-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-020-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-021-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-022-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-023-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-029-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-030-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-032-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-033-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-035-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-044-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-045-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-049-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-050-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-055-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-056-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-059-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-060-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-062-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-063-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-065-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-067-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-069-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-072-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-073-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-074-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-075-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-076-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-080-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-081-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-082-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-084-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-085-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-087-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-088-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-091-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-095-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-096-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-098-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-099-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-102-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-103-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-104-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-107-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-108-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-111-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-112-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-115-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-118-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-119-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-121-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-123-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-126-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-127-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-128-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-130-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-133-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-136-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-137-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-138-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-139-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-141-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-142-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-143-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-144-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-145-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-149-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-150-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-153-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-154-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-156-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-158-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-161-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-165-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-168-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-174-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-175-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-176-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-177-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-178-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-180-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-182-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-183-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-184-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-186-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-187-00000.npy + # ~> DOLMA CC MIDDLE 33% (242.05 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-000-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-003-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-004-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-006-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-007-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-010-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-011-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-012-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-013-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-014-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-015-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-019-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-020-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-021-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-024-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-026-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-031-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-033-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-034-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-035-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-036-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-039-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-043-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-044-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-045-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-046-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-047-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-048-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-050-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-051-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-052-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-053-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-056-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-059-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-062-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-063-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-064-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-066-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-070-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-071-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-072-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-073-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-074-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-075-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-080-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-083-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-084-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-089-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-090-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-092-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-093-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-096-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-098-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-099-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-100-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-103-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-105-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-108-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-115-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-117-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-121-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-123-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-126-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-131-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-134-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-135-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-138-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-142-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-143-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-144-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-147-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-148-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-150-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-151-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-152-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-154-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-155-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-157-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-159-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-161-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-162-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-163-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-164-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-170-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-171-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-172-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-175-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-179-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-180-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-181-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-184-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-186-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-187-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-188-00000.npy + # ~> DOLMA CC TAIL 33% (268.05 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-000-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-002-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-003-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-004-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-008-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-013-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-014-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-016-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-018-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-019-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-021-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-022-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-023-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-026-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-027-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-029-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-030-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-032-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-036-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-037-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-039-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-041-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-043-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-046-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-048-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-050-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-051-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-052-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-053-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-055-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-061-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-063-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-066-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-068-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-071-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-072-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-073-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-074-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-076-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-077-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-081-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-082-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-083-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-084-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-086-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-088-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-090-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-091-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-092-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-093-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-098-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-101-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-103-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-104-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-105-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-107-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-108-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-109-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-115-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-116-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-121-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-123-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-125-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-126-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-129-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-133-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-134-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-136-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-137-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-140-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-142-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-148-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-149-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-153-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-156-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-157-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-159-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-161-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-162-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-164-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-165-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-171-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-172-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-173-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-174-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-176-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-177-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-178-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-181-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-182-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-183-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-184-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-185-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_doc_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-187-00000.npy \ No newline at end of file diff --git a/configs/olmo7-ablation-dedupeparas.yaml b/configs/olmo7-ablation-dedupeparas.yaml new file mode 100644 index 000000000..663c91a41 --- /dev/null +++ b/configs/olmo7-ablation-dedupeparas.yaml @@ -0,0 +1,1625 @@ +run_name: olmo7-ablation-dedupedocs +seed: 61394 +dry_run: false + +wandb: + name: ${run_name} + project: olmo7-ablations + group: olmo7-ablation-dedupedocs + +model: + d_model: 4096 + n_heads: 32 + n_layers: 32 + # mlp_ratio: 6 + mlp_hidden_size: 22016 + weight_tying: false + alibi: false + rope: true + flash_attention: true + attention_dropout: 0.0 + attention_layer_norm: false + multi_query_attention: false + include_bias: false + block_type: sequential + layer_norm_type: default + layer_norm_with_affine: false + bias_for_layer_norm: false + attention_layer_norm_with_affine: false + activation_type: swiglu + residual_dropout: 0.0 + embedding_dropout: 0.0 + max_sequence_length: 2048 + vocab_size: 50280 + embedding_size: 50304 + eos_token_id: 0 + pad_token_id: 1 + init_device: meta + init_fn: mitchell + +compile: null + +optimizer: + name: adamw + learning_rate: 1.5e-4 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + metrics_log_interval: 10 + +scheduler: + name: linear_with_warmup + t_warmup: 1000 + alpha_f: 0.1 + +tokenizer: + identifier: tokenizers/allenai_eleuther-ai-gpt-neox-20b-pii-special.json + truncate_direction: right + +save_folder: ${oc.env:CHECKPOINTS_PATH}/${oc.env:SLURM_JOB_ID,${run_name}} +save_overwrite: false +# Sharded checkpoints (best for restarts) +save_interval: 200 +save_num_checkpoints_to_keep: -1 +# Unsharded checkpoints (for final storage) +save_interval_unsharded: null +save_num_unsharded_checkpoints_to_keep: -1 + +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/hrshlkzq/step119000-unsharded/ # 0.5T +load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/j18wauyq/step238000-unsharded/ # 1.0T +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/4xel5n7e/step358000-unsharded/ # 1.5T +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/xtruaap8/step477000-unsharded/ # 2.0T + +no_pre_train_checkpoint: true +reset_optimizer_state: true +reset_trainer_state: true + +max_duration: 100e9T +global_train_batch_size: 2048 +device_train_microbatch_size: 2 +time_limit: null + +precision: amp_bf16 + +fsdp: + wrapping_strategy: by_block_and_size + precision: mixed + +max_grad_norm: 1.0 +max_grad_norm_ratio: null + +speed_monitor: + window_size: 20 + +eval_interval: ${save_interval} +eval_subset_num_batches: -1 +device_eval_batch_size: ${device_train_microbatch_size} +evaluators: + - label: all-small-ppl-validation + data: + num_workers: 0 + drop_last: true + # pin_memory: true + # prefetch_factor: 1 + # persistent_workers: false + # timeout: 0 + datasets: + 4chan-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/4chan/val.npy + c4_100_domains-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_100_domains/val.npy + c4_en-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_en/val.npy + gab-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/gab/val.npy + ice-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ice/val.npy + m2d2_s2orc-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_s2orc/val.npy + m2d2_wiki-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_wiki/val.npy + manosphere-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/manosphere/val.npy + mc4_en-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/mc4_en/val.npy + pile-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/pile/val.npy + ptb-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ptb/val.npy + twitterAEE-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/twitterAEE/val.npy + wikitext_103-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/wikitext_103/val.npy + + ########################## + # Downstream evaluations # + ########################## + - label: piqa + type: downstream + + - label: hellaswag + type: downstream + + - label: winogrande + type: downstream + + - label: openbook_qa + type: downstream + + - label: boolq + type: downstream + + - label: sciq + type: downstream + + - label: arc_easy + type: downstream + + - label: arc_challenge + type: downstream + + - label: mmlu_stem + type: downstream + + - label: mmlu_humanities + type: downstream + + - label: mmlu_social_sciences + type: downstream + + - label: mmlu_other + type: downstream + + - label: mmlu_stem_var + type: downstream + + - label: mmlu_humanities_var + type: downstream + + - label: mmlu_social_sciences_var + type: downstream + + - label: mmlu_other_var + type: downstream + + #- label: copa + # type: downstream + + #- label: rte + # type: downstream + + #- label: commitment_bank + # type: downstream + + #- label: mrpc + # type: downstream + + #- label: sst2 + # type: downstream + +data: + pad_direction: right + num_workers: 16 + drop_last: true + pin_memory: true + prefetch_factor: 1 + persistent_workers: true + timeout: 0 + paths: + ######### NON WEB DATA ######### + # ~> GUTENBERG BOOKS (5.256 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + # ~> PES2O STEM PAPERS (57.21 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + # ~> WIKIPEDIA & WIKIBOOKS (3.689 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/wiki/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/wiki/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + # ~> REDPAJAMA STACKEXCHANGE (19.63 GT) + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + # ~> REDPAJAMA ARXIV (19.63 GT) + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + # ~> PROOFPILE2 ALGEBRAIC STACK (12.623 GT) + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + # ~> PROOFPILE2 OPENWEBMATH (12.734 GT) + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + # ~> TULU FLAN V0 (1.84 GT) + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-3-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-4-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-5-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-6-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-7-00000.npy + #################################### + ######### CODE ######### + # ~> STARCODER (263.775 GT) + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-00-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-03-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-04-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-05-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-06-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-07-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-08-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-09-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-10-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-11-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-12-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-13-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-14-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-15-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-16-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-17-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-18-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-19-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-20-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-21-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-22-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-23-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-24-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-25-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-26-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-27-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-30-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-31-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-32-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-33-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-34-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-35-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-36-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-37-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-38-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-39-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-40-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-41-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-42-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-43-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-44-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-46-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-47-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-48-00001.npy + #################################### + ######### WEB HIGH QUALITY ######### + # ~> C4 (157.2 GT) + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-49-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-50-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-51-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-52-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-53-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-54-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-55-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-56-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-57-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-58-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-59-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-60-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-61-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-62-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-63-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-64-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-65-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-66-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-67-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-68-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-69-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-70-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-71-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-72-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-73-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-74-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-75-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-76-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-77-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-78-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-79-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-80-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-81-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-82-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-83-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-84-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-85-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-86-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-87-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-88-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-89-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-90-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-91-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-92-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-93-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-94-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-95-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-96-00000.npy + # ~> REDDIT (79.988 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-49-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-50-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-51-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-52-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-53-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-54-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-55-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-56-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-57-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-58-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-59-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-60-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-61-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-62-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-63-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-64-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-65-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-66-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-67-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-68-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-69-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-70-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-71-00000.npy + # ~> FALCON (547.341 GT) + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-000-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-000-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-000-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-001-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-001-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-001-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-002-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-002-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-002-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-003-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-003-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-003-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-005-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-005-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-005-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-006-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-006-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-006-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-007-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-007-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-009-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-009-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-009-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-010-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-010-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-010-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-011-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-011-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-011-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-013-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-013-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-013-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-014-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-014-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-014-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-015-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-015-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-015-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-016-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-016-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-016-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-018-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-018-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-018-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-019-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-019-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-019-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-020-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-020-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-020-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-022-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-022-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-022-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-023-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-023-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-023-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-024-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-024-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-024-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-026-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-026-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-026-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-027-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-027-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-027-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-028-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-028-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-028-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-029-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-029-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-029-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-031-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-031-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-031-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-032-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-032-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-032-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-033-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-033-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-033-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-035-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-035-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-035-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-036-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-036-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-036-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-037-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-037-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-037-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-039-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-039-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-039-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-040-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-040-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-040-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-041-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-041-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-041-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-042-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-042-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-042-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-044-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-044-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-044-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-045-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-045-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-045-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-046-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-046-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-046-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-048-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-048-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-048-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-049-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-049-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-049-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-050-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-050-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-050-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-052-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-052-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-052-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-053-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-053-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-053-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-054-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-054-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-054-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-056-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-056-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-056-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-057-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-057-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-057-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-058-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-058-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-058-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-059-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-059-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-059-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-061-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-061-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-061-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-062-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-062-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-062-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-063-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-063-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-063-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-065-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-065-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-065-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-066-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-066-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-066-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-067-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-067-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-067-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-069-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-069-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-069-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-070-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-070-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-070-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-071-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-071-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-071-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-072-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-072-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-072-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-074-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-074-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-074-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-075-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-075-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-075-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-076-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-076-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-076-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-078-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-078-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-078-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-079-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-079-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-079-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-080-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-080-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-080-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-082-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-082-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-082-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-083-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-083-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-083-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-084-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-084-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-084-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-085-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-085-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-085-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-087-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-087-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-087-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-088-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-088-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-088-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-089-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-089-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-089-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-091-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-091-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-091-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-092-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-092-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-092-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-093-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-093-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-093-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-095-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-095-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-095-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-096-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-096-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-096-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-097-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-097-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-097-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-098-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-098-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-098-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-100-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-100-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-100-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-101-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-101-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-101-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-102-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-102-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-102-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-104-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-104-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-104-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-105-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-105-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-105-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-106-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-106-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-106-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-108-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-108-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-108-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-109-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-109-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-109-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-110-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-110-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-110-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-112-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-112-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-112-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-113-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-113-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-113-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-114-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-114-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-114-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-115-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-115-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-115-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-117-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-117-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-117-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-118-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-118-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-118-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-119-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-119-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-119-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-121-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-121-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-121-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-122-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-122-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-122-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-123-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-123-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-123-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-125-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-125-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-125-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-126-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-126-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-126-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-127-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-127-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-127-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-128-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-128-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-128-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-130-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-130-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-130-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-131-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-131-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-131-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-132-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-132-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-132-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-134-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-134-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-134-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-135-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-135-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-135-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-136-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-136-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-136-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-138-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-138-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-138-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-139-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-139-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-139-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-140-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-140-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-140-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-141-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-141-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-141-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-143-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-143-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-143-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-144-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-144-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-144-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-145-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-145-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-145-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-147-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-147-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-147-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-148-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-148-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-148-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-149-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-149-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-149-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-151-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-151-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-151-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-152-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-152-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-152-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-153-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-153-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-153-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-154-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-154-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-154-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-156-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-156-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-156-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-157-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-157-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-157-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-158-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-158-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-158-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-160-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-160-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-160-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-161-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-161-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-161-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-162-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-162-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-162-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-164-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-164-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-164-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-165-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-165-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-165-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-166-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-166-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-166-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-167-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-167-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-167-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-169-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-169-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-169-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-170-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-170-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-170-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-171-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-171-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-171-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-173-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-173-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-173-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-174-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-174-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-174-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-175-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-175-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-175-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-177-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-177-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-177-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-178-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-178-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-178-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-179-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-179-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-179-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-181-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-181-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-181-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-182-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-182-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-182-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-183-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-183-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-183-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-184-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-184-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-184-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-186-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-186-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-186-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-187-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-187-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-187-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-188-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-188-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-188-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-190-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-190-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-190-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-191-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-191-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-191-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-192-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-192-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-192-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-194-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-194-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-194-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-195-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-195-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-195-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-196-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-196-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-196-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-197-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-197-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-197-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-199-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-199-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-199-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-200-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-200-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-200-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-201-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-201-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-201-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-203-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-203-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-203-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-204-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-204-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-204-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-205-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-205-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-205-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-207-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-207-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-207-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-208-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-208-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-208-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-209-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-209-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-209-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-210-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-210-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-210-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-212-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-212-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-212-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-213-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-213-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-213-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-214-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-214-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-214-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-216-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-216-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-216-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-217-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-217-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-217-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-218-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-218-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-218-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-220-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-220-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-220-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-221-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-221-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-221-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-222-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-222-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-222-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-223-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-223-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-223-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-224-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-224-00001.npy + #################################### + ######### WEB REST ######### + # ~> DOLMA CC HEAD 50% (187.2 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-000-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-008-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-010-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-011-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-012-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-013-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-015-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-016-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-017-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-018-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-021-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-024-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-025-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-028-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-030-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-033-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-035-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-036-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-038-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-039-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-040-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-041-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-042-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-043-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-044-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-045-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-048-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-049-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-053-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-055-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-060-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-061-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-066-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-069-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-070-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-073-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-076-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-080-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-081-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-082-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-084-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-085-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-087-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-088-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-089-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-092-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-093-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-094-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-095-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-099-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-100-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-101-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-103-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-105-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-106-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-107-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-108-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-109-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-110-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-121-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-124-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-125-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-126-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-127-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-129-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-130-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-131-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-132-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-134-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-135-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-136-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-139-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-140-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-141-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-144-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-146-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-148-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-152-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-153-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-155-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-159-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-162-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-163-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-166-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-170-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-171-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-177-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-178-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-181-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-182-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-183-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-184-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-185-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-187-00000.npy + # ~> DOLMA CC MIDDLE 33% (242.05 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-003-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-005-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-006-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-008-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-010-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-014-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-015-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-018-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-020-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-021-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-023-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-025-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-026-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-027-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-028-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-029-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-030-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-033-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-034-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-037-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-041-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-043-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-049-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-050-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-051-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-055-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-059-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-061-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-062-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-064-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-066-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-069-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-073-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-074-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-077-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-079-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-080-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-088-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-090-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-091-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-093-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-094-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-095-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-096-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-097-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-098-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-099-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-100-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-101-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-105-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-107-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-108-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-109-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-110-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-112-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-116-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-118-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-119-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-121-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-123-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-124-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-126-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-127-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-128-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-130-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-133-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-135-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-138-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-139-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-144-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-145-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-146-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-148-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-149-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-150-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-151-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-152-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-158-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-161-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-164-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-166-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-167-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-168-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-169-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-170-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-171-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-173-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-174-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-180-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-185-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-186-00000.npy + # ~> DOLMA CC TAIL 33% (268.05 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-001-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-005-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-006-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-007-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-008-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-009-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-010-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-012-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-013-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-015-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-017-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-018-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-021-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-022-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-024-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-025-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-026-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-029-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-031-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-032-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-034-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-041-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-042-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-044-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-046-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-048-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-049-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-051-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-052-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-053-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-054-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-058-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-059-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-061-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-062-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-063-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-065-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-072-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-073-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-075-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-076-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-077-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-078-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-080-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-081-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-084-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-085-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-086-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-091-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-092-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-096-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-097-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-100-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-102-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-103-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-104-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-105-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-106-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-107-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-108-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-109-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-110-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-112-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-114-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-115-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-117-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-120-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-126-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-127-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-128-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-131-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-134-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-135-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-136-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-137-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-138-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-139-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-142-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-144-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-145-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-146-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-147-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-150-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-151-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-153-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-154-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-162-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-165-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-166-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-167-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-169-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-170-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-172-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-173-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-174-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-175-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-177-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-179-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-180-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-181-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-182-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-184-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-187-00000.npy \ No newline at end of file diff --git a/configs/olmo7-ablation-dolma17.yaml b/configs/olmo7-ablation-dolma17.yaml new file mode 100644 index 000000000..8ae01387d --- /dev/null +++ b/configs/olmo7-ablation-dolma17.yaml @@ -0,0 +1,1491 @@ +run_name: olmo7-ablation-dolma17 +seed: 61394 +dry_run: false + +wandb: + name: ${run_name} + project: olmo7-ablations + group: olmo7-ablation-dolma17 + +model: + d_model: 4096 + n_heads: 32 + n_layers: 32 + # mlp_ratio: 6 + mlp_hidden_size: 22016 + weight_tying: false + alibi: false + rope: true + flash_attention: true + attention_dropout: 0.0 + attention_layer_norm: false + multi_query_attention: false + include_bias: false + block_type: sequential + layer_norm_type: default + layer_norm_with_affine: false + bias_for_layer_norm: false + attention_layer_norm_with_affine: false + activation_type: swiglu + residual_dropout: 0.0 + embedding_dropout: 0.0 + max_sequence_length: 2048 + vocab_size: 50280 + embedding_size: 50304 + eos_token_id: 0 + pad_token_id: 1 + init_device: meta + init_fn: mitchell + +compile: null + +optimizer: + name: adamw + learning_rate: 1.5e-4 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + metrics_log_interval: 10 + +scheduler: + name: linear_with_warmup + t_warmup: 1000 + alpha_f: 0.1 + +tokenizer: + identifier: tokenizers/allenai_eleuther-ai-gpt-neox-20b-pii-special.json + truncate_direction: right + +save_folder: ${oc.env:CHECKPOINTS_PATH}/${oc.env:SLURM_JOB_ID,${run_name}} +save_overwrite: false +# Sharded checkpoints (best for restarts) +save_interval: 200 +save_num_checkpoints_to_keep: -1 +# Unsharded checkpoints (for final storage) +save_interval_unsharded: null +save_num_unsharded_checkpoints_to_keep: -1 + +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/hrshlkzq/step119000-unsharded/ # 0.5T +load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/j18wauyq/step238000-unsharded/ # 1.0T +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/4xel5n7e/step358000-unsharded/ # 1.5T +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/xtruaap8/step477000-unsharded/ # 2.0T + +no_pre_train_checkpoint: true +reset_optimizer_state: true +reset_trainer_state: true + +max_duration: 100e9T +global_train_batch_size: 2048 +device_train_microbatch_size: 2 +time_limit: null + +precision: amp_bf16 + +fsdp: + wrapping_strategy: by_block_and_size + precision: mixed + +max_grad_norm: 1.0 +max_grad_norm_ratio: null + +speed_monitor: + window_size: 20 + +eval_interval: ${save_interval} +eval_subset_num_batches: -1 +device_eval_batch_size: ${device_train_microbatch_size} +evaluators: + - label: all-small-ppl-validation + data: + num_workers: 0 + drop_last: true + # pin_memory: true + # prefetch_factor: 1 + # persistent_workers: false + # timeout: 0 + datasets: + 4chan-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/4chan/val.npy + c4_100_domains-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_100_domains/val.npy + c4_en-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_en/val.npy + gab-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/gab/val.npy + ice-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ice/val.npy + m2d2_s2orc-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_s2orc/val.npy + m2d2_wiki-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_wiki/val.npy + manosphere-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/manosphere/val.npy + mc4_en-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/mc4_en/val.npy + pile-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/pile/val.npy + ptb-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ptb/val.npy + twitterAEE-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/twitterAEE/val.npy + wikitext_103-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/wikitext_103/val.npy + + ########################## + # Downstream evaluations # + ########################## + - label: piqa + type: downstream + + - label: hellaswag + type: downstream + + - label: winogrande + type: downstream + + - label: openbook_qa + type: downstream + + - label: boolq + type: downstream + + - label: sciq + type: downstream + + - label: arc_easy + type: downstream + + - label: arc_challenge + type: downstream + + - label: mmlu_stem + type: downstream + + - label: mmlu_humanities + type: downstream + + - label: mmlu_social_sciences + type: downstream + + - label: mmlu_other + type: downstream + + - label: mmlu_stem_var + type: downstream + + - label: mmlu_humanities_var + type: downstream + + - label: mmlu_social_sciences_var + type: downstream + + - label: mmlu_other_var + type: downstream + + #- label: copa + # type: downstream + + #- label: rte + # type: downstream + + #- label: commitment_bank + # type: downstream + + #- label: mrpc + # type: downstream + + #- label: sst2 + # type: downstream + +data: + pad_direction: right + num_workers: 16 + drop_last: true + pin_memory: true + prefetch_factor: 1 + persistent_workers: true + timeout: 0 + paths: + ######### NON WEB DATA ######### + # ~> GUTENBERG BOOKS (5.256 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + # ~> PES2O STEM PAPERS (57.21 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + # ~> WIKIPEDIA & WIKIBOOKS (3.689 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/wiki/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/wiki/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + # ~> REDPAJAMA STACKEXCHANGE (19.63 GT) + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + # ~> REDPAJAMA ARXIV (19.63 GT) + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + # ~> PROOFPILE2 ALGEBRAIC STACK (12.623 GT) + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + # ~> PROOFPILE2 OPENWEBMATH (12.734 GT) + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + # ~> TULU FLAN V0 (1.84 GT) + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-3-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-4-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-5-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-6-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-7-00000.npy + #################################### + ######### CODE ######### + # ~> STARCODER (263.775 GT) + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-00-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-03-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-04-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-05-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-06-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-07-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-08-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-09-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-10-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-11-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-12-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-13-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-14-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-15-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-16-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-17-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-18-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-19-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-20-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-21-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-22-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-23-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-24-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-25-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-26-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-27-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-30-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-31-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-32-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-33-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-34-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-35-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-36-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-37-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-38-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-39-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-40-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-41-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-42-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-43-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-44-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-46-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-47-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-48-00001.npy + #################################### + ######### WEB HIGH QUALITY ######### + # ~> C4 (174.418 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-49-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-50-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-51-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-52-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-53-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-54-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-55-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-56-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-57-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-58-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-59-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-60-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-61-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-62-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-63-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-64-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-65-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-66-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-67-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-68-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-69-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-70-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-71-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-72-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-73-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-74-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-75-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-76-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-77-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-78-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-79-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-80-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-81-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-82-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-83-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-84-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/c4/gpt-neox-olmo-dolma-v1_5/part-85-00000.npy + # ~> REDDIT (79.988 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-49-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-50-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-51-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-52-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-53-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-54-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-55-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-56-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-57-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-58-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-59-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-60-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-61-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-62-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-63-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-64-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-65-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-66-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-67-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-68-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-69-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-70-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-71-00000.npy + # ~> FALCON (547.341 GT) + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-000-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-000-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-000-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-001-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-001-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-001-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-002-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-002-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-002-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-003-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-003-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-003-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-005-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-005-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-005-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-006-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-006-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-006-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-007-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-007-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-009-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-009-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-009-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-010-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-010-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-010-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-011-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-011-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-011-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-013-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-013-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-013-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-014-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-014-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-014-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-015-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-015-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-015-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-016-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-016-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-016-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-018-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-018-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-018-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-019-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-019-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-019-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-020-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-020-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-020-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-022-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-022-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-022-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-023-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-023-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-023-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-024-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-024-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-024-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-026-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-026-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-026-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-027-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-027-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-027-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-028-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-028-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-028-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-029-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-029-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-029-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-031-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-031-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-031-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-032-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-032-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-032-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-033-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-033-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-033-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-035-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-035-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-035-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-036-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-036-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-036-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-037-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-037-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-037-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-039-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-039-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-039-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-040-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-040-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-040-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-041-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-041-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-041-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-042-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-042-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-042-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-044-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-044-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-044-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-045-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-045-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-045-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-046-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-046-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-046-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-048-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-048-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-048-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-049-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-049-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-049-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-050-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-050-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-050-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-052-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-052-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-052-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-053-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-053-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-053-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-054-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-054-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-054-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-056-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-056-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-056-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-057-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-057-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-057-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-058-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-058-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-058-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-059-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-059-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-059-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-061-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-061-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-061-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-062-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-062-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-062-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-063-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-063-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-063-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-065-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-065-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-065-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-066-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-066-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-066-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-067-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-067-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-067-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-069-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-069-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-069-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-070-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-070-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-070-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-071-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-071-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-071-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-072-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-072-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-072-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-074-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-074-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-074-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-075-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-075-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-075-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-076-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-076-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-076-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-078-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-078-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-078-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-079-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-079-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-079-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-080-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-080-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-080-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-082-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-082-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-082-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-083-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-083-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-083-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-084-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-084-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-084-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-085-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-085-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-085-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-087-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-087-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-087-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-088-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-088-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-088-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-089-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-089-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-089-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-091-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-091-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-091-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-092-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-092-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-092-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-093-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-093-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-093-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-095-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-095-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-095-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-096-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-096-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-096-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-097-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-097-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-097-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-098-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-098-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-098-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-100-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-100-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-100-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-101-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-101-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-101-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-102-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-102-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-102-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-104-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-104-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-104-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-105-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-105-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-105-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-106-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-106-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-106-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-108-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-108-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-108-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-109-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-109-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-109-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-110-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-110-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-110-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-112-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-112-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-112-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-113-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-113-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-113-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-114-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-114-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-114-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-115-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-115-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-115-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-117-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-117-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-117-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-118-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-118-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-118-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-119-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-119-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-119-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-121-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-121-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-121-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-122-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-122-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-122-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-123-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-123-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-123-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-125-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-125-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-125-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-126-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-126-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-126-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-127-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-127-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-127-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-128-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-128-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-128-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-130-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-130-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-130-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-131-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-131-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-131-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-132-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-132-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-132-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-134-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-134-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-134-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-135-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-135-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-135-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-136-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-136-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-136-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-138-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-138-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-138-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-139-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-139-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-139-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-140-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-140-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-140-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-141-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-141-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-141-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-143-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-143-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-143-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-144-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-144-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-144-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-145-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-145-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-145-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-147-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-147-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-147-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-148-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-148-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-148-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-149-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-149-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-149-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-151-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-151-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-151-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-152-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-152-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-152-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-153-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-153-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-153-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-154-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-154-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-154-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-156-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-156-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-156-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-157-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-157-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-157-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-158-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-158-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-158-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-160-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-160-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-160-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-161-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-161-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-161-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-162-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-162-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-162-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-164-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-164-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-164-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-165-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-165-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-165-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-166-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-166-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-166-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-167-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-167-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-167-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-169-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-169-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-169-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-170-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-170-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-170-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-171-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-171-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-171-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-173-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-173-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-173-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-174-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-174-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-174-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-175-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-175-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-175-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-177-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-177-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-177-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-178-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-178-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-178-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-179-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-179-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-179-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-181-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-181-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-181-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-182-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-182-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-182-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-183-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-183-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-183-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-184-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-184-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-184-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-186-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-186-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-186-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-187-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-187-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-187-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-188-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-188-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-188-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-190-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-190-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-190-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-191-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-191-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-191-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-192-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-192-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-192-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-194-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-194-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-194-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-195-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-195-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-195-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-196-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-196-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-196-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-197-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-197-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-197-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-199-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-199-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-199-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-200-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-200-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-200-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-201-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-201-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-201-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-203-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-203-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-203-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-204-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-204-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-204-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-205-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-205-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-205-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-207-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-207-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-207-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-208-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-208-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-208-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-209-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-209-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-209-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-210-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-210-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-210-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-212-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-212-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-212-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-213-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-213-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-213-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-214-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-214-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-214-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-216-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-216-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-216-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-217-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-217-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-217-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-218-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-218-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-218-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-220-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-220-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-220-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-221-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-221-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-221-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-222-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-222-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-222-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-223-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-223-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-223-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-224-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-224-00001.npy + #################################### + ######### WEB REST ######### + # ~> DOLMA CC HEAD 33% (192.264 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-003-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-007-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-012-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-015-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-020-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-025-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-032-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-035-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-040-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-043-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-044-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-046-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-048-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-052-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-053-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-056-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-069-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-070-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-071-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-075-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-085-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-097-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-102-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-104-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-105-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-109-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-110-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-112-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-117-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-124-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-125-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-127-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-128-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-129-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-132-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-134-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-135-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-136-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-139-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-141-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-142-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-144-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-149-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-152-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-156-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-159-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-164-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-167-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-169-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-172-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-173-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-174-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-177-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-186-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-187-00000.npy + # ~> DOLMA CC MIDDLE 33% (189.606 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-003-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-007-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-012-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-015-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-020-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-025-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-032-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-035-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-040-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-043-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-044-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-046-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-048-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-052-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-053-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-056-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-069-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-070-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-071-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-075-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-085-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-097-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-102-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-104-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-105-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-109-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-110-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-112-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-117-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-124-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-125-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-127-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-128-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-129-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-132-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-134-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-135-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-136-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-139-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-141-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-142-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-144-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-149-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-152-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-156-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-159-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-164-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-167-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-169-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-172-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-173-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-174-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-177-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-186-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-187-00000.npy + # ~> DOLMA CC TAIL 33% (294.252 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-003-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-007-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-012-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-015-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-020-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-025-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-032-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-035-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-040-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-043-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-044-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-046-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-048-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-052-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-053-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-056-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-069-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-070-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-071-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-075-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-085-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-097-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-102-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-104-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-105-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-109-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-110-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-112-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-117-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-124-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-125-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-127-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-128-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-129-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-132-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-134-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-135-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-136-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-139-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-141-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-142-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-144-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-149-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-152-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-156-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-159-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-164-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-167-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-169-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-172-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-173-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-174-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-177-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-186-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-187-00000.npy \ No newline at end of file diff --git a/configs/olmo7-ablation-final2.yaml b/configs/olmo7-ablation-final2.yaml new file mode 100644 index 000000000..6e0b465e5 --- /dev/null +++ b/configs/olmo7-ablation-final2.yaml @@ -0,0 +1,1258 @@ +run_name: olmo7-ablation-final2 +seed: 61394 +dry_run: false + +wandb: + name: ${run_name} + project: olmo7-ablations + group: olmo7-ablation-final2 + +model: + d_model: 4096 + n_heads: 32 + n_layers: 32 + # mlp_ratio: 6 + mlp_hidden_size: 22016 + weight_tying: false + alibi: false + rope: true + flash_attention: true + attention_dropout: 0.0 + attention_layer_norm: false + multi_query_attention: false + include_bias: false + block_type: sequential + layer_norm_type: default + layer_norm_with_affine: false + bias_for_layer_norm: false + attention_layer_norm_with_affine: false + activation_type: swiglu + residual_dropout: 0.0 + embedding_dropout: 0.0 + max_sequence_length: 2048 + vocab_size: 50280 + embedding_size: 50304 + eos_token_id: 0 + pad_token_id: 1 + init_device: meta + init_fn: mitchell + +compile: null + +optimizer: + name: adamw + learning_rate: 1.5e-4 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + metrics_log_interval: 10 + +scheduler: + name: linear_with_warmup + t_warmup: 1000 + alpha_f: 0.1 + +tokenizer: + identifier: tokenizers/allenai_eleuther-ai-gpt-neox-20b-pii-special.json + truncate_direction: right + +save_folder: ${oc.env:CHECKPOINTS_PATH}/${oc.env:SLURM_JOB_ID,${run_name}} +save_overwrite: false +# Sharded checkpoints (best for restarts) +save_interval: 200 +save_num_checkpoints_to_keep: -1 +# Unsharded checkpoints (for final storage) +save_interval_unsharded: null +save_num_unsharded_checkpoints_to_keep: -1 + +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/hrshlkzq/step119000-unsharded/ # 0.5T +load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/j18wauyq/step238000-unsharded/ # 1.0T +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/4xel5n7e/step358000-unsharded/ # 1.5T +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/xtruaap8/step477000-unsharded/ # 2.0T + +no_pre_train_checkpoint: true +reset_optimizer_state: true +reset_trainer_state: true + +max_duration: 100e9T +global_train_batch_size: 2048 +device_train_microbatch_size: 2 +time_limit: null + +precision: amp_bf16 + +fsdp: + wrapping_strategy: by_block_and_size + precision: mixed + +max_grad_norm: 1.0 +max_grad_norm_ratio: null + +speed_monitor: + window_size: 20 + +eval_interval: ${save_interval} +eval_subset_num_batches: -1 +device_eval_batch_size: ${device_train_microbatch_size} +evaluators: + - label: all-small-ppl-validation + data: + num_workers: 0 + drop_last: true + # pin_memory: true + # prefetch_factor: 1 + # persistent_workers: false + # timeout: 0 + datasets: + 4chan-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/4chan/val.npy + c4_100_domains-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_100_domains/val.npy + c4_en-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_en/val.npy + gab-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/gab/val.npy + ice-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ice/val.npy + m2d2_s2orc-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_s2orc/val.npy + m2d2_wiki-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_wiki/val.npy + manosphere-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/manosphere/val.npy + mc4_en-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/mc4_en/val.npy + pile-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/pile/val.npy + ptb-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ptb/val.npy + twitterAEE-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/twitterAEE/val.npy + wikitext_103-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/wikitext_103/val.npy + + ########################## + # Downstream evaluations # + ########################## + - label: piqa + type: downstream + + - label: hellaswag + type: downstream + + - label: winogrande + type: downstream + + - label: openbook_qa + type: downstream + + - label: boolq + type: downstream + + - label: sciq + type: downstream + + - label: arc_easy + type: downstream + + - label: arc_challenge + type: downstream + + - label: mmlu_stem + type: downstream + + - label: mmlu_humanities + type: downstream + + - label: mmlu_social_sciences + type: downstream + + - label: mmlu_other + type: downstream + + - label: mmlu_stem_var + type: downstream + + - label: mmlu_humanities_var + type: downstream + + - label: mmlu_social_sciences_var + type: downstream + + - label: mmlu_other_var + type: downstream + + #- label: copa + # type: downstream + + #- label: rte + # type: downstream + + #- label: commitment_bank + # type: downstream + + #- label: mrpc + # type: downstream + + #- label: sst2 + # type: downstream + +data: + pad_direction: right + num_workers: 16 + drop_last: true + pin_memory: true + prefetch_factor: 1 + persistent_workers: true + timeout: 0 + paths: + ######### NON WEB DATA ######### + # ~> GUTENBERG BOOKS (5.256 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + # ~> PES2O STEM PAPERS (57.21 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + # ~> WIKIPEDIA & WIKIBOOKS (3.689 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/wiki/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/wiki/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + # MEGAWIKA v1 (4.6 GT) + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-01-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-02-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-03-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-04-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-05-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-06-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-07-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-08-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-09-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-10-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-11-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-12-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-13-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-14-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-16-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-17-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-18-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-19-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-20-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-21-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-22-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-23-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-23-00002.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-24-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-25-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-26-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-27-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-30-00001.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/megawika/v1/gpt-neox-olmo-dolma-v1_5/part-31-00001.npy + # ~> REDPAJAMA STACKEXCHANGE (19.63 GT) + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + # ~> REDPAJAMA ARXIV (19.63 GT) + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + # ~> PROOFPILE2 ALGEBRAIC STACK (12.623 GT) + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + # ~> PROOFPILE2 OPENWEBMATH (12.734 GT) + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + # ~> TULU FLAN V1 (16.5 G v2-decontaminated-60M-shots_all-upweight_1-dialog_true-sep_newline) + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-49-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-50-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-51-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-52-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-53-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-54-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-55-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-56-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-57-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-58-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-59-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-60-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-61-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-62-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-63-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-64-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v2-decontaminated-60M-shots_all-upweight_1-dialog_false-sep_newline/train/gpt-neox-olmo-dolma-v1_5/part-65-00000.npy + # ~> CC NEWS (14.3 GT) + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-0-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-0-00002.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-0-00003.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-1-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-1-00002.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-1-00003.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-2-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-2-00002.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-2-00003.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-3-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-3-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-3-00002.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-3-00003.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-4-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-4-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-4-00002.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-4-00003.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-5-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-5-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-6-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-6-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-6-00002.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-6-00003.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-7-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-7-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-7-00002.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-7-00003.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-8-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-8-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v3/gpt-neox-olmo-dolma-v1_5/part-8-00002.npy + #################################### + ######### CODE ######### + # ~> STARCODER (263.775 GT) + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-00-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-03-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-04-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-05-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-06-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-07-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-08-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-09-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-10-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-11-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-12-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-13-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-14-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-15-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-16-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-17-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-18-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-19-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-20-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-21-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-22-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-23-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-24-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-25-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-26-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-27-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-30-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-31-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-32-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-33-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-34-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-35-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-36-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-37-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-38-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-39-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-40-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-41-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-42-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-43-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-44-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-46-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-47-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-48-00001.npy + #################################### + ######### WEB HIGH QUALITY ######### + # ~> C4 (138.4 GT) + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-000-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-001-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-002-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-003-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-004-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-005-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-006-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-007-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-008-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-009-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-010-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-011-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-012-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-013-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-014-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-015-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-016-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-017-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-018-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-019-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-020-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-021-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-022-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-023-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-024-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-025-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-026-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-027-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-028-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-029-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-030-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-031-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-032-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-033-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-034-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-035-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-036-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-037-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-038-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-039-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-040-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-041-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-042-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-043-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-044-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-045-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-046-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-047-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-048-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-049-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-050-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-051-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-052-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-053-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-054-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-055-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-056-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-057-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-058-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-059-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-060-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-061-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-062-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-063-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-064-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-065-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-066-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-067-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-068-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-069-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-070-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-071-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-072-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-073-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-074-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-075-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-076-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-077-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-078-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-079-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-080-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-081-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-082-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-083-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-084-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-085-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-086-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-087-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-088-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-089-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-090-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-091-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-092-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-093-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-094-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-095-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-096-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-097-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-098-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-099-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-100-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-101-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-102-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-103-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-104-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-105-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-106-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-107-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-108-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-109-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-110-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-111-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-112-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-113-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-114-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-115-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-116-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-117-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-118-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-119-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-120-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-121-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-122-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-123-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-124-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-125-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-126-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-127-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-128-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-129-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-130-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-131-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-132-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-133-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-134-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-135-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-136-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-137-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-138-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-139-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-140-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-141-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-142-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-143-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-144-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-145-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-146-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-147-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-148-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-149-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-150-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-151-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-152-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-153-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-154-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-155-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-156-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-157-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-158-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-159-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-160-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-161-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-162-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-163-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-164-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-165-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-166-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-167-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-168-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-169-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001-fix/gpt-neox-olmo-dolma-v1_5/part-170-00000.npy + # ~> REDDIT (79.9 GT) + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-49-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-50-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-51-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-52-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-53-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-54-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-55-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-56-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-57-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-58-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-59-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-60-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-61-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-62-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-63-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-64-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-65-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-66-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-67-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-68-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-69-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-70-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-71-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-72-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-73-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-74-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-75-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-76-00000.npy + - s3://ai2-llm/preprocessed/reddit/v5-dedupe-pii-nsfw-toxic-fuzzydd-length/gpt-neox-olmo-dolma-v1_5/part-77-00000.npy + # ~> FALCON (547.341 GT) + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-000-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-001-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-002-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-003-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-004-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-005-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-006-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-007-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-008-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-009-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-010-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-011-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-012-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-013-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-014-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-015-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-016-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-017-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-018-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-019-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-020-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-021-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-022-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-023-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-024-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-025-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-026-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-027-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-028-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-029-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-030-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-031-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-032-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-033-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-034-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-035-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-036-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-037-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-038-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-039-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-040-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-041-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-042-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-043-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-044-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-045-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-046-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-047-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-048-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-049-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-050-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-051-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-052-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-053-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-054-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-055-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-056-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-057-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-058-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-059-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-060-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-061-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-062-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-063-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-064-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-065-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-066-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-067-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-068-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-069-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-070-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-071-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-072-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-073-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-074-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-075-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-076-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-077-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-078-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-079-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-080-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-081-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-082-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-083-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-084-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-085-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-086-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-087-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-088-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-089-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-090-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-091-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-092-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-093-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-094-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-095-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-096-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-097-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-098-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-099-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-100-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-101-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-102-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-103-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-104-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-105-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-106-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-107-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-108-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-109-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-110-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-111-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-112-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-113-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-114-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-115-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-116-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-117-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-118-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-119-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-120-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-121-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-122-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-123-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-124-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-125-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-126-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-127-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-128-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-129-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-130-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-131-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-132-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-133-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-134-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-135-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-136-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-137-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-138-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-139-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-140-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-141-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-142-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-143-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-144-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-145-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-146-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-147-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-148-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-149-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-150-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-151-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-152-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-153-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-154-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-155-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-156-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-157-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-158-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-159-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-160-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-161-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-162-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-163-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-164-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-165-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-166-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-167-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-168-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-169-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-170-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-171-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-172-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-173-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-174-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-175-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-176-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-177-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-178-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-179-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-180-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-181-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-182-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-183-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-184-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-185-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-186-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v2-frac_005_100-qc_cc_multi_bin-paloma-rep-pii/gpt-neox-olmo-dolma-v1_5/part-187-00000.npy + #################################### + ######### WEB REST ######### + # ~> DOLMA CC HEAD 50% (178.4 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-000-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-001-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-002-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-003-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-004-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-005-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-006-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-006-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-007-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-008-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-009-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-010-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-011-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-012-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-013-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-014-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-015-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-016-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-017-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-018-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-019-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-020-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-021-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-022-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-023-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-024-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-025-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-026-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-027-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-028-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-029-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-030-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-031-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-032-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-033-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-034-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-035-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-036-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-037-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-038-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-039-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-040-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-041-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-042-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-043-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-044-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-045-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-046-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-047-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-048-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-049-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-050-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-051-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-052-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-053-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-054-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-055-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-056-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-057-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-058-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-059-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-060-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-061-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-062-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-063-00000.npy + # ~> DOLMA CC MIDDLE 33% (242.05 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-000-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-001-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-002-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-003-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-004-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-005-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-006-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-007-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-008-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-009-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-010-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-011-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-012-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-013-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-014-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-015-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-016-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-017-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-018-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-019-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-020-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-021-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-022-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-023-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-024-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-025-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-026-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-027-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-028-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-029-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-030-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-031-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-032-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-033-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-034-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-035-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-036-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-037-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-038-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-039-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-040-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-041-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-042-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-043-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-044-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-045-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-046-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-047-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-048-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-049-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-050-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-051-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-052-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-053-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-054-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-055-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-056-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-057-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-058-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-059-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-060-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-061-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-062-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-063-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-064-00000.npy + # ~> DOLMA CC TAIL 33% (191.4 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-000-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-001-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-002-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-003-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-004-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-005-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-006-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-007-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-008-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-009-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-010-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-011-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-012-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-013-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-014-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-015-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-016-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-017-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-018-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-019-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-020-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-021-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-022-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-023-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-024-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-025-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-026-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-027-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-028-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-029-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-030-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-031-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-032-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-033-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-034-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-035-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-036-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-037-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-038-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-039-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-040-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-041-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-042-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-043-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-044-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-045-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-046-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-047-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-048-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-049-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-050-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-051-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-052-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-053-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-054-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-055-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-056-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-057-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-058-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-059-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-060-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-061-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-062-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-063-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-064-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-065-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-066-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-067-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-068-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-069-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-070-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-071-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-072-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-073-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-074-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-075-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-076-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-077-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-078-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-079-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-080-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-081-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-082-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-083-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-084-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-085-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-086-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-087-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-088-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-089-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-090-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-091-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7-dd_ngram_dp_030-qc_cc_en_bin_001/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-092-00000.npy \ No newline at end of file diff --git a/configs/olmo7-ablation-refheavy.yaml b/configs/olmo7-ablation-refheavy.yaml new file mode 100644 index 000000000..e764892c9 --- /dev/null +++ b/configs/olmo7-ablation-refheavy.yaml @@ -0,0 +1,1704 @@ +run_name: olmo7-ablation-refheavy +seed: 61394 +dry_run: false + +wandb: + name: ${run_name} + project: olmo7-ablations + group: olmo7-ablation-refheavy + +model: + d_model: 4096 + n_heads: 32 + n_layers: 32 + # mlp_ratio: 6 + mlp_hidden_size: 22016 + weight_tying: false + alibi: false + rope: true + flash_attention: true + attention_dropout: 0.0 + attention_layer_norm: false + multi_query_attention: false + include_bias: false + block_type: sequential + layer_norm_type: default + layer_norm_with_affine: false + bias_for_layer_norm: false + attention_layer_norm_with_affine: false + activation_type: swiglu + residual_dropout: 0.0 + embedding_dropout: 0.0 + max_sequence_length: 2048 + vocab_size: 50280 + embedding_size: 50304 + eos_token_id: 0 + pad_token_id: 1 + init_device: meta + init_fn: mitchell + +compile: null + +optimizer: + name: adamw + learning_rate: 1.5e-4 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + metrics_log_interval: 10 + +scheduler: + name: linear_with_warmup + t_warmup: 1000 + alpha_f: 0.1 + +tokenizer: + identifier: tokenizers/allenai_eleuther-ai-gpt-neox-20b-pii-special.json + truncate_direction: right + +save_folder: ${oc.env:CHECKPOINTS_PATH}/${oc.env:SLURM_JOB_ID,${run_name}} +save_overwrite: false +# Sharded checkpoints (best for restarts) +save_interval: 200 +save_num_checkpoints_to_keep: -1 +# Unsharded checkpoints (for final storage) +save_interval_unsharded: null +save_num_unsharded_checkpoints_to_keep: -1 + +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/hrshlkzq/step119000-unsharded/ # 0.5T +load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/j18wauyq/step238000-unsharded/ # 1.0T +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/4xel5n7e/step358000-unsharded/ # 1.5T +#load_path: r2://olmo-checkpoints/ai2-llm/olmo-medium/xtruaap8/step477000-unsharded/ # 2.0T + +no_pre_train_checkpoint: true +reset_optimizer_state: true +reset_trainer_state: true + +max_duration: 100e9T +global_train_batch_size: 2048 +device_train_microbatch_size: 2 +time_limit: null + +precision: amp_bf16 + +fsdp: + wrapping_strategy: by_block_and_size + precision: mixed + +max_grad_norm: 1.0 +max_grad_norm_ratio: null + +speed_monitor: + window_size: 20 + +eval_interval: ${save_interval} +eval_subset_num_batches: -1 +device_eval_batch_size: ${device_train_microbatch_size} +evaluators: + - label: all-small-ppl-validation + data: + num_workers: 0 + drop_last: true + # pin_memory: true + # prefetch_factor: 1 + # persistent_workers: false + # timeout: 0 + datasets: + 4chan-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/4chan/val.npy + c4_100_domains-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_100_domains/val.npy + c4_en-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_en/val.npy + gab-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/gab/val.npy + ice-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ice/val.npy + m2d2_s2orc-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_s2orc/val.npy + m2d2_wiki-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_wiki/val.npy + manosphere-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/manosphere/val.npy + mc4_en-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/mc4_en/val.npy + pile-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/pile/val.npy + ptb-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ptb/val.npy + twitterAEE-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/twitterAEE/val.npy + wikitext_103-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/wikitext_103/val.npy + + ########################## + # Downstream evaluations # + ########################## + - label: piqa + type: downstream + + - label: hellaswag + type: downstream + + - label: winogrande + type: downstream + + - label: openbook_qa + type: downstream + + - label: boolq + type: downstream + + - label: sciq + type: downstream + + - label: arc_easy + type: downstream + + - label: arc_challenge + type: downstream + + - label: mmlu_stem + type: downstream + + - label: mmlu_humanities + type: downstream + + - label: mmlu_social_sciences + type: downstream + + - label: mmlu_other + type: downstream + + - label: mmlu_stem_var + type: downstream + + - label: mmlu_humanities_var + type: downstream + + - label: mmlu_social_sciences_var + type: downstream + + - label: mmlu_other_var + type: downstream + + #- label: copa + # type: downstream + + #- label: rte + # type: downstream + + #- label: commitment_bank + # type: downstream + + #- label: mrpc + # type: downstream + + #- label: sst2 + # type: downstream + +data: + pad_direction: right + num_workers: 16 + drop_last: true + pin_memory: true + prefetch_factor: 1 + persistent_workers: true + timeout: 0 + paths: + ######### NON WEB DATA ######### + # ~> GUTENBERG BOOKS (5.256 GT x 3) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + # ~> PES2O STEM PAPERS (57.21 GT x 2) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + # ~> WIKIPEDIA & WIKIBOOKS (3.689 GT x 3) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/wiki/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/wiki/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/wiki/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/wiki/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/wiki/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/wiki/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + # ~> REDPAJAMA STACKEXCHANGE (19.63 GT x 2) + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + # ~> CC NEWS (15 GT x 2) + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-0-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-1-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-3-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-3-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-4-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-4-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-5-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-5-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-6-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-6-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-7-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-7-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-8-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-8-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-0-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-1-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-3-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-3-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-4-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-4-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-5-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-5-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-6-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-6-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-7-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-7-00001.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-8-00000.npy + - s3://ai2-llm/preprocessed/cc-news/v2/gpt-neox-olmo-dolma-v1_5/part-8-00001.npy + # ~> REDPAJAMA ARXIV (19.63 GT x 2) + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + # ~> PROOFPILE2 ALGEBRAIC STACK (12.623 GT) + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/algebraic-stack/train/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + # ~> PROOFPILE2 OPENWEBMATH (12.734 GT x 2) + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/proof-pile-2/v0_decontaminated/open-web-math/train/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + # ~> TULU FLAN V0 (1.84 GT x 5) + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-3-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-4-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-5-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-6-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-7-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-3-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-4-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-5-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-6-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-7-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-3-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-4-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-5-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-6-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-7-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-3-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-4-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-5-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-6-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-7-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-3-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-4-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-5-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-6-00000.npy + - s3://ai2-llm/preprocessed/tulu_flan/v0_all_train/gpt-neox-olmo-dolma-v1_5/part-7-00000.npy + #################################### + ######### CODE ######### + # ~> STARCODER (263.775 GT) + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-00-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-03-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-04-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-05-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-06-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-07-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-08-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-09-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-10-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-11-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-12-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-13-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-14-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-15-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-16-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-17-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-18-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-19-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-20-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-21-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-22-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-23-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-24-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-25-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-26-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-27-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-30-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-31-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-32-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-33-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-34-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-35-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-36-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-37-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-38-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-39-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-40-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-41-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-42-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-43-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-44-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-46-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-47-00001.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy + - s3://ai2-llm/preprocessed/starcoder/v0_decontaminated_doc_only/gpt-neox-olmo-dolma-v1_5/part-48-00001.npy + #################################### + ######### WEB HIGH QUALITY ######### + # ~> C4 (157.2 GT) + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-49-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-50-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-51-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-52-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-53-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-54-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-55-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-56-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-57-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-58-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-59-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-60-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-61-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-62-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-63-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-64-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-65-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-66-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-67-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-68-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-69-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-70-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-71-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-72-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-73-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-74-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-75-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-76-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-77-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-78-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-79-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-80-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-81-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-82-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-83-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-84-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-85-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-86-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-87-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-88-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-89-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-90-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-91-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-92-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-93-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-94-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-95-00000.npy + - s3://ai2-llm/preprocessed/c4/v1_dd_ngram_docpara_le030/gpt-neox-olmo-dolma-v1_5/part-96-00000.npy + # ~> REDDIT (79.988 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-07-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-08-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-09-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-10-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-11-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-12-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-13-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-14-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-15-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-16-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-17-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-18-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-19-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-20-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-21-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-22-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-23-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-24-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-25-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-26-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-27-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-28-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-29-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-30-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-31-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-32-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-33-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-34-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-35-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-36-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-37-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-38-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-39-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-40-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-41-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-42-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-43-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-44-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-45-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-46-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-47-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-48-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-49-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-50-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-51-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-52-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-53-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-54-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-55-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-56-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-57-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-58-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-59-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-60-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-61-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-62-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-63-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-64-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-65-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-66-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-67-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-68-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-69-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-70-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/reddit/gpt-neox-olmo-dolma-v1_5/part-71-00000.npy + # ~> FALCON (547.341 GT) + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-000-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-000-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-000-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-001-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-001-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-001-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-002-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-002-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-002-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-003-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-003-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-003-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-004-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-005-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-005-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-005-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-006-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-006-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-006-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-007-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-007-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-008-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-009-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-009-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-009-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-010-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-010-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-010-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-011-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-011-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-011-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-012-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-013-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-013-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-013-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-014-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-014-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-014-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-015-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-015-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-015-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-016-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-016-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-016-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-017-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-018-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-018-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-018-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-019-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-019-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-019-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-020-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-020-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-020-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-021-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-022-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-022-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-022-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-023-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-023-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-023-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-024-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-024-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-024-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-025-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-026-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-026-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-026-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-027-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-027-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-027-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-028-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-028-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-028-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-029-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-029-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-029-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-030-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-031-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-031-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-031-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-032-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-032-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-032-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-033-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-033-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-033-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-034-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-035-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-035-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-035-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-036-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-036-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-036-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-037-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-037-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-037-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-038-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-039-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-039-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-039-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-040-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-040-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-040-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-041-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-041-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-041-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-042-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-042-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-042-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-043-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-044-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-044-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-044-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-045-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-045-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-045-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-046-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-046-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-046-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-047-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-048-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-048-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-048-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-049-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-049-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-049-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-050-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-050-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-050-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-051-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-052-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-052-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-052-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-053-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-053-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-053-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-054-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-054-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-054-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-055-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-056-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-056-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-056-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-057-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-057-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-057-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-058-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-058-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-058-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-059-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-059-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-059-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-060-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-061-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-061-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-061-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-062-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-062-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-062-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-063-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-063-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-063-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-064-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-065-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-065-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-065-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-066-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-066-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-066-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-067-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-067-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-067-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-068-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-069-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-069-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-069-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-070-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-070-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-070-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-071-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-071-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-071-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-072-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-072-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-072-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-073-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-074-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-074-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-074-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-075-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-075-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-075-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-076-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-076-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-076-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-077-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-078-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-078-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-078-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-079-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-079-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-079-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-080-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-080-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-080-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-081-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-082-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-082-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-082-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-083-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-083-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-083-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-084-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-084-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-084-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-085-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-085-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-085-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-086-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-087-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-087-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-087-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-088-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-088-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-088-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-089-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-089-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-089-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-090-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-091-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-091-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-091-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-092-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-092-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-092-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-093-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-093-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-093-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-094-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-095-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-095-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-095-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-096-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-096-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-096-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-097-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-097-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-097-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-098-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-098-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-098-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-099-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-100-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-100-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-100-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-101-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-101-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-101-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-102-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-102-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-102-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-103-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-104-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-104-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-104-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-105-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-105-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-105-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-106-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-106-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-106-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-107-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-108-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-108-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-108-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-109-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-109-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-109-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-110-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-110-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-110-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-111-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-112-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-112-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-112-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-113-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-113-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-113-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-114-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-114-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-114-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-115-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-115-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-115-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-116-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-117-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-117-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-117-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-118-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-118-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-118-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-119-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-119-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-119-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-120-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-121-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-121-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-121-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-122-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-122-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-122-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-123-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-123-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-123-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-124-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-125-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-125-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-125-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-126-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-126-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-126-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-127-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-127-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-127-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-128-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-128-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-128-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-129-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-130-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-130-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-130-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-131-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-131-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-131-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-132-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-132-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-132-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-133-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-134-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-134-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-134-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-135-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-135-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-135-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-136-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-136-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-136-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-137-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-138-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-138-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-138-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-139-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-139-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-139-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-140-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-140-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-140-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-141-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-141-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-141-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-142-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-143-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-143-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-143-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-144-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-144-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-144-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-145-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-145-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-145-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-146-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-147-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-147-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-147-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-148-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-148-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-148-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-149-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-149-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-149-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-150-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-151-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-151-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-151-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-152-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-152-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-152-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-153-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-153-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-153-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-154-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-154-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-154-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-155-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-156-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-156-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-156-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-157-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-157-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-157-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-158-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-158-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-158-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-159-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-160-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-160-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-160-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-161-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-161-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-161-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-162-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-162-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-162-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-163-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-164-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-164-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-164-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-165-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-165-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-165-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-166-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-166-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-166-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-167-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-167-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-167-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-168-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-169-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-169-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-169-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-170-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-170-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-170-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-171-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-171-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-171-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-172-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-173-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-173-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-173-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-174-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-174-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-174-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-175-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-175-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-175-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-176-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-177-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-177-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-177-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-178-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-178-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-178-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-179-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-179-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-179-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-180-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-181-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-181-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-181-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-182-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-182-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-182-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-183-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-183-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-183-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-184-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-184-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-184-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-185-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-186-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-186-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-186-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-187-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-187-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-187-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-188-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-188-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-188-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-189-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-190-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-190-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-190-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-191-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-191-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-191-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-192-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-192-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-192-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-193-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-194-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-194-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-194-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-195-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-195-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-195-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-196-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-196-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-196-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-197-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-197-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-197-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-198-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-199-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-199-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-199-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-200-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-200-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-200-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-201-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-201-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-201-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-202-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-203-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-203-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-203-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-204-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-204-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-204-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-205-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-205-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-205-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-206-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-207-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-207-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-207-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-208-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-208-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-208-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-209-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-209-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-209-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-210-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-210-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-210-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-211-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-212-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-212-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-212-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-213-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-213-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-213-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-214-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-214-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-214-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-215-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-216-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-216-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-216-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-217-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-217-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-217-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-218-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-218-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-218-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-219-00003.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-220-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-220-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-220-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-221-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-221-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-221-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-222-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-222-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-222-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-223-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-223-00001.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-223-00002.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-224-00000.npy + - s3://ai2-llm/preprocessed/falcon-refinedweb/v0-0.05-heldout-complement_decon_ppl_suite_v3/gpt-neox-20b-pii-special/part-224-00001.npy + #################################### + ######### WEB REST ######### + # ~> DOLMA CC HEAD 35% (127.9 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-010-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-011-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-012-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-013-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-015-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-016-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-018-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-021-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-024-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-025-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-028-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-030-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-035-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-038-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-039-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-040-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-041-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-042-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-053-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-055-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-060-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-061-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-069-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-070-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-073-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-076-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-082-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-084-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-087-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-088-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-092-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-094-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-095-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-099-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-101-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-103-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-105-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-107-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-108-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-109-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-110-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-124-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-126-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-127-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-129-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-134-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-135-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-136-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-140-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-144-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-146-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-152-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-155-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-159-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-162-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-163-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-166-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-170-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-171-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-177-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-181-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-182-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-184-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-185-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_head/gpt-neox-olmo-dolma-v1_5/part-187-00000.npy + # ~> DOLMA CC MIDDLE 35 (164.5 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-003-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-005-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-008-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-010-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-014-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-015-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-020-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-021-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-023-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-025-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-027-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-029-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-030-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-033-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-041-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-049-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-050-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-051-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-059-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-061-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-062-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-069-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-074-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-077-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-079-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-080-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-088-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-091-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-096-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-099-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-100-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-105-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-107-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-109-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-110-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-116-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-119-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-124-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-127-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-128-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-133-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-135-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-138-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-139-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-144-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-145-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-146-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-148-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-149-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-150-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-151-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-152-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-158-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-161-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-164-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-166-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-167-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-168-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-170-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-171-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-173-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-180-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_middle/gpt-neox-olmo-dolma-v1_5/part-186-00000.npy + # ~> DOLMA CC TAIL 35% (179.1 GT) + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-001-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-006-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-007-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-008-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-009-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-012-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-013-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-015-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-017-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-018-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-021-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-022-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-025-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-029-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-032-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-034-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-041-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-042-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-044-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-048-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-049-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-051-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-053-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-058-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-062-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-065-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-072-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-076-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-077-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-078-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-081-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-084-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-085-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-092-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-096-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-100-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-102-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-103-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-104-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-106-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-107-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-110-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-112-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-114-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-115-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-120-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-126-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-127-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-128-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-131-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-134-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-135-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-136-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-137-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-139-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-142-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-146-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-147-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-151-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-153-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-154-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-162-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-165-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-170-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-172-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-173-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-174-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-177-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-180-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-181-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_7_dd_ngram_docpara_le030_decontam/cc_en_tail/gpt-neox-olmo-dolma-v1_5/part-187-00000.npy \ No newline at end of file diff --git a/olmo/eval/downstream.py b/olmo/eval/downstream.py index b81f7927a..09df95de0 100644 --- a/olmo/eval/downstream.py +++ b/olmo/eval/downstream.py @@ -1163,6 +1163,7 @@ def __init__( dataset_name=None, split="validation", prompt_variations=None, + mc_labels=False, ): dataset_names = [] # Collect the relevant categories @@ -1178,9 +1179,15 @@ def __init__( if dataset_name in cats: dataset_names.append(name) self.dev_set = {} + self.mc_labels = mc_labels prompts: List[Union[None, str]] = [None] - if prompt_variations == 1: - prompts = [None, "inst", "inst+1", "inst+2", "inst+3", "inst+4", "inst+5"] + if prompt_variations is not None: + if prompt_variations == 1: + prompts = [None, "inst", "inst+1", "inst+2", "inst+3", "inst+4", "inst+5"] + elif prompt_variations == 2: + prompts = ["inst+5"] + else: + raise ValueError(f"Unknown prompt variations: {prompt_variations}") # Need to grab the dev set for the few-shot prompts for name in dataset_names: self.dev_set[name] = datasets.load_dataset( @@ -1195,7 +1202,20 @@ def __init__( ) def doc_to_text(self, doc): - output_text = "Question: " + doc["question"] + "\nAnswer:" + def format_example(doc, keys): + question_prefix = "" + if not self.mc_labels: + question_prefix = "Question: " # To make context more clear + question = question_prefix + doc["question"].strip() + choices = "" + if self.mc_labels: + choices = "".join([f"{key}. {choice}\n" for key, choice in zip(keys, doc["choices"])]) + prompt = f"{question}\n{choices}Answer:" + return prompt + + keys = ["A", "B", "C", "D"] + output_text = format_example(doc, keys) + if self.current_prompt is not None: prefix = "" if "inst" in self.current_prompt: @@ -1208,13 +1228,18 @@ def doc_to_text(self, doc): for idx, dev_doc in enumerate(dev_set): if idx >= num_shots_int: break - answer = dev_doc["choices"][dev_doc["answer"]] - prefix += "Question: " + dev_doc["question"] + "\nAnswer: " + answer + "\n\n" + if self.mc_labels: + answer = keys[dev_doc["answer"]] + else: + answer = dev_doc["choices"][dev_doc["answer"]] + prefix += format_example(dev_doc, keys) + " " + answer + "\n\n" output_text = prefix + output_text return output_text def doc_to_continuations(self, doc): # add spaces in front of continuation + if self.mc_labels: + return [" A", " B", " C", " D"] return [" " + choice for choice in doc["choices"]] def doc_to_label(self, doc): @@ -1254,4 +1279,27 @@ def doc_to_domain_conditional(self, doc): "mmlu_humanities_var": (MMLU, {"dataset_name": "humanities", "prompt_variations": 1}), "mmlu_social_sciences_var": (MMLU, {"dataset_name": "social_sciences", "prompt_variations": 1}), "mmlu_other_var": (MMLU, {"dataset_name": "other", "prompt_variations": 1}), + "mmlu_stem_mc_5shot": (MMLU, {"dataset_name": "stem", "prompt_variations": 2, "mc_labels": True}), + "mmlu_humanities_mc_5shot": (MMLU, {"dataset_name": "humanities", "prompt_variations": 2, "mc_labels": True}), + "mmlu_social_sciences_mc_5shot": ( + MMLU, + {"dataset_name": "social_sciences", "prompt_variations": 2, "mc_labels": True}, + ), + "mmlu_other_mc_5shot": (MMLU, {"dataset_name": "other", "prompt_variations": 2, "mc_labels": True}), + "mmlu_stem_mc_5shot_test": ( + MMLU, + {"dataset_name": "stem", "split": "test", "prompt_variations": 2, "mc_labels": True}, + ), + "mmlu_humanities_mc_5shot_test": ( + MMLU, + {"dataset_name": "humanities", "split": "test", "prompt_variations": 2, "mc_labels": True}, + ), + "mmlu_social_sciences_mc_5shot_test": ( + MMLU, + {"dataset_name": "social_sciences", "split": "test", "prompt_variations": 2, "mc_labels": True}, + ), + "mmlu_other_mc_5shot_test": ( + MMLU, + {"dataset_name": "other", "split": "test", "prompt_variations": 2, "mc_labels": True}, + ), } diff --git a/olmo/model.py b/olmo/model.py index 882f7d6e8..555e0ca81 100644 --- a/olmo/model.py +++ b/olmo/model.py @@ -425,10 +425,10 @@ def __init__(self, layer_id: int, config: ModelConfig, cache: BufferCache): self.k_norm: Optional[LayerNormBase] = None self.q_norm: Optional[LayerNormBase] = None if config.attention_layer_norm: - assert config.n_kv_heads is not None + assert config.effective_n_kv_heads is not None self.k_norm = LayerNormBase.build( config, - size=config.d_model // config.effective_n_kv_heads, + size=(config.d_model // config.n_heads) * config.effective_n_kv_heads, elementwise_affine=config.attention_layer_norm_with_affine, ) self.q_norm = LayerNormBase.build(config, elementwise_affine=config.attention_layer_norm_with_affine) diff --git a/olmo/train.py b/olmo/train.py index 1494a1b49..4454786e3 100644 --- a/olmo/train.py +++ b/olmo/train.py @@ -105,7 +105,7 @@ def cross_entropy_loss( z_squared = logits.logsumexp(-1).pow(2) if reduction == "mean": - z_squared = z_squared / (labels != ignore_index).mean() + z_squared = (z_squared * (labels != ignore_index)).mean() elif reduction == "sum": z_squared = (z_squared * (labels != ignore_index)).sum() diff --git a/olmo/util.py b/olmo/util.py index dbab77b38..1e33808ef 100644 --- a/olmo/util.py +++ b/olmo/util.py @@ -511,12 +511,12 @@ def _s3_upload( _wait_before_retry(attempt) if err is not None: - raise OLMoNetworkError("Failed to check object existence during s3 upload") from err + raise OLMoNetworkError(f"Failed to check object existence during {scheme} upload") from err try: _get_s3_client(scheme).upload_file(source, bucket_name, key) except boto_exceptions.ClientError as e: - raise OLMoNetworkError("Failed to upload to s3") from e + raise OLMoNetworkError(f"Failed to upload to {scheme}") from e def _s3_file_size(scheme: str, bucket_name: str, key: str, max_attempts: int = 3) -> int: @@ -533,7 +533,7 @@ def _s3_file_size(scheme: str, bucket_name: str, key: str, max_attempts: int = 3 log.warning("%s failed attempt %d with retriable error: %s", _s3_file_size.__name__, attempt, err) _wait_before_retry(attempt) - raise OLMoNetworkError("Failed to get s3 file size") from err + raise OLMoNetworkError(f"Failed to get {scheme} file size") from err def _s3_get_bytes_range( @@ -551,7 +551,7 @@ def _s3_get_bytes_range( ) except boto_exceptions.ClientError as e: if e.response["ResponseMetadata"]["HTTPStatusCode"] == 404: - raise FileNotFoundError(f"s3://{bucket_name}/{key}") from e + raise FileNotFoundError(f"{scheme}://{bucket_name}/{key}") from e err = e except (boto_exceptions.HTTPClientError, boto_exceptions.ConnectionError) as e: # ResponseStreamingError (subclass of HTTPClientError) can happen as @@ -572,7 +572,7 @@ def _s3_get_bytes_range( # This can cause an irrelevant exception (e.g. KeyError: 'error'), resulting # in us losing the true exception info. To avoid this, we change the exception # to a type that has a single-parameter constructor. - raise OLMoNetworkError("Failed to get bytes range from s3") from err + raise OLMoNetworkError(f"Failed to get bytes range from {scheme}") from err def _s3_find_latest_checkpoint(scheme: str, bucket_name: str, prefix: str) -> Optional[str]: @@ -600,7 +600,7 @@ def _s3_find_latest_checkpoint(scheme: str, bucket_name: str, prefix: str) -> Op # We prioritize sharded checkpoints over unsharded ones. if step > latest_step or (step == latest_step and not checkpoint_name.endswith("-unsharded")): latest_step = step - latest_checkpoint = f"s3://ai2-llm/{prefix}" + latest_checkpoint = f"{scheme}://ai2-llm/{prefix}" return latest_checkpoint diff --git a/scripts/beaker/olmo7-ablation-baseline.sh b/scripts/beaker/olmo7-ablation-baseline.sh new file mode 100755 index 000000000..cd64e59e1 --- /dev/null +++ b/scripts/beaker/olmo7-ablation-baseline.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash + +set -ex + +CONFIG_PATH=configs/olmo7-ablation-baseline.yaml +NUM_NODES=8 +ARGS='--run_name=olmo7-ablation-baseline --wandb.name=baseline --model.flash_attention=true --fsdp.wrapping_strategy=by_block_and_size --fsdp.sharding_strategy=SHARD_GRAD_OP --save_folder=runs/ --device_train_microbatch_size=3 --global_train_batch_size=6144 --wandb.group=baseline --remote_save_folder=s3://ai2-llm/checkpoints/olmo7-ablation/baseline3 --load_path=s3://ai2-llm/checkpoints/olmo7-ablation/baseline3/step7800' + +gantry run \ + --allow-dirty \ + --workspace ai2/llm-testing \ + --task-name olmo7-ablation-baseline \ + --description olmo7-ablation-baseline \ + --priority high \ + --beaker-image olmo-torch2-gantry \ + --cluster ai2/pluto-cirrascale \ + --gpus 8 \ + --replicas "${NUM_NODES}" \ + --leader-selection \ + --host-networking \ + --nfs \ + --mount /net/nfs.cirrascale/allennlp/petew/cache:/root/.cache \ + --budget ai2/oe-training \ + --env LOG_FILTER_TYPE=local_rank0_only \ + --env OMP_NUM_THREADS=8 \ + --env OLMO_TASK=model \ + --env-secret WANDB_API_KEY=WANDB_API_KEY \ + --env-secret AWS_ACCESS_KEY_ID=AWS_ACCESS_KEY_ID \ + --env-secret AWS_SECRET_ACCESS_KEY=AWS_SECRET_ACCESS_KEY \ + --env-secret R2_ACCESS_KEY_ID=R2_ACCESS_KEY_ID \ + --env-secret R2_SECRET_ACCESS_KEY=R2_SECRET_ACCESS_KEY \ + --env-secret R2_ENDPOINT_URL=R2_ENDPOINT_URL \ + --shared-memory 10GiB \ + --venv base \ + --yes \ + -- /bin/bash -c "source scripts/beaker/warm_hf_cache.sh && torchrun --nnodes ${NUM_NODES}:${NUM_NODES} --nproc-per-node 8 --rdzv_id=101 --rdzv_backend=c10d --rdzv_endpoint=\$BEAKER_LEADER_REPLICA_HOSTNAME:29400 scripts/train.py ${CONFIG_PATH} ${ARGS}" diff --git a/scripts/beaker/olmo7-ablation-dedupeparas.sh b/scripts/beaker/olmo7-ablation-dedupeparas.sh new file mode 100755 index 000000000..0f9e6badf --- /dev/null +++ b/scripts/beaker/olmo7-ablation-dedupeparas.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash + +set -ex + +CONFIG_PATH=configs/olmo7-ablation-dedupeparas.yaml +NUM_NODES=8 +ARGS='--run_name=olmo7-ablation-dedupeparas --wandb.name=dedupeparas --model.flash_attention=true --fsdp.wrapping_strategy=by_block_and_size --fsdp.sharding_strategy=SHARD_GRAD_OP --save_folder=runs/ --device_train_microbatch_size=3 --global_train_batch_size=6144 --wandb.group=dedupeparas --remote_save_folder=s3://ai2-llm/checkpoints/olmo7-ablation/dedupeparas' + +gantry run \ + --allow-dirty \ + --workspace ai2/llm-testing \ + --task-name olmo7-ablation-dedupeparas \ + --description olmo7-ablation-dedupeparas \ + --priority high \ + --beaker-image olmo-torch2-gantry \ + --cluster ai2/pluto-cirrascale \ + --gpus 8 \ + --replicas "${NUM_NODES}" \ + --leader-selection \ + --host-networking \ + --nfs \ + --mount /net/nfs.cirrascale/allennlp/petew/cache:/root/.cache \ + --budget ai2/oe-training \ + --env LOG_FILTER_TYPE=local_rank0_only \ + --env OMP_NUM_THREADS=8 \ + --env OLMO_TASK=model \ + --env-secret WANDB_API_KEY=WANDB_API_KEY \ + --env-secret AWS_ACCESS_KEY_ID=AWS_ACCESS_KEY_ID \ + --env-secret AWS_SECRET_ACCESS_KEY=AWS_SECRET_ACCESS_KEY \ + --env-secret R2_ACCESS_KEY_ID=R2_ACCESS_KEY_ID \ + --env-secret R2_SECRET_ACCESS_KEY=R2_SECRET_ACCESS_KEY \ + --env-secret R2_ENDPOINT_URL=R2_ENDPOINT_URL \ + --shared-memory 10GiB \ + --venv base \ + --yes \ + -- /bin/bash -c "source scripts/beaker/warm_hf_cache.sh && torchrun --nnodes ${NUM_NODES}:${NUM_NODES} --nproc-per-node 8 --rdzv_id=101 --rdzv_backend=c10d --rdzv_endpoint=\$BEAKER_LEADER_REPLICA_HOSTNAME:29400 scripts/train.py ${CONFIG_PATH} ${ARGS}" diff --git a/scripts/beaker/olmo7-ablation-final2.sh b/scripts/beaker/olmo7-ablation-final2.sh new file mode 100755 index 000000000..3fbf72573 --- /dev/null +++ b/scripts/beaker/olmo7-ablation-final2.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash + +set -ex + +CONFIG_PATH=configs/olmo7-ablation-final2.yaml +NUM_NODES=8 +ARGS='--run_name=olmo7-ablation-final2 --wandb.name=final2 --model.flash_attention=true --fsdp.wrapping_strategy=by_block_and_size --fsdp.sharding_strategy=SHARD_GRAD_OP --save_folder=runs/ --device_train_microbatch_size=3 --global_train_batch_size=6144 --wandb.group=final2 --remote_save_folder=s3://ai2-llm/checkpoints/olmo7-ablation/final2' + +gantry run \ + --allow-dirty \ + --workspace ai2/llm-testing \ + --task-name olmo7-ablation-final2 \ + --description olmo7-ablation-final2 \ + --priority high \ + --beaker-image olmo-torch2-gantry \ + --cluster ai2/pluto-cirrascale \ + --gpus 8 \ + --replicas "${NUM_NODES}" \ + --leader-selection \ + --host-networking \ + --nfs \ + --mount /net/nfs.cirrascale/allennlp/petew/cache:/root/.cache \ + --budget ai2/oe-training \ + --env LOG_FILTER_TYPE=local_rank0_only \ + --env OMP_NUM_THREADS=8 \ + --env OLMO_TASK=model \ + --env-secret WANDB_API_KEY=WANDB_API_KEY \ + --env-secret AWS_ACCESS_KEY_ID=AWS_ACCESS_KEY_ID \ + --env-secret AWS_SECRET_ACCESS_KEY=AWS_SECRET_ACCESS_KEY \ + --env-secret R2_ACCESS_KEY_ID=R2_ACCESS_KEY_ID \ + --env-secret R2_SECRET_ACCESS_KEY=R2_SECRET_ACCESS_KEY \ + --env-secret R2_ENDPOINT_URL=R2_ENDPOINT_URL \ + --shared-memory 10GiB \ + --venv base \ + --yes \ + -- /bin/bash -c "source scripts/beaker/warm_hf_cache.sh && torchrun --nnodes ${NUM_NODES}:${NUM_NODES} --nproc-per-node 8 --rdzv_id=101 --rdzv_backend=c10d --rdzv_endpoint=\$BEAKER_LEADER_REPLICA_HOSTNAME:29400 scripts/train.py ${CONFIG_PATH} ${ARGS}" diff --git a/scripts/beaker/olmo7-ablation-refheavy.sh b/scripts/beaker/olmo7-ablation-refheavy.sh new file mode 100755 index 000000000..fe1c61aa1 --- /dev/null +++ b/scripts/beaker/olmo7-ablation-refheavy.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash + +set -ex + +CONFIG_PATH=configs/olmo7-ablation-refheavy.yaml +NUM_NODES=8 +ARGS='--run_name=olmo7-ablation-refheavy --wandb.name=refheavy --model.flash_attention=true --fsdp.wrapping_strategy=by_block_and_size --fsdp.sharding_strategy=SHARD_GRAD_OP --save_folder=runs/ --device_train_microbatch_size=3 --global_train_batch_size=6144 --wandb.group=refheavy --remote_save_folder=s3://ai2-llm/checkpoints/olmo7-ablation/refheavy' + +gantry run \ + --allow-dirty \ + --workspace ai2/llm-testing \ + --task-name olmo7-ablation-refheavy \ + --description olmo7-ablation-refheavy \ + --priority high \ + --beaker-image olmo-torch2-gantry \ + --cluster ai2/pluto-cirrascale \ + --gpus 8 \ + --replicas "${NUM_NODES}" \ + --leader-selection \ + --host-networking \ + --nfs \ + --mount /net/nfs.cirrascale/allennlp/petew/cache:/root/.cache \ + --budget ai2/oe-training \ + --env LOG_FILTER_TYPE=local_rank0_only \ + --env OMP_NUM_THREADS=8 \ + --env OLMO_TASK=model \ + --env-secret WANDB_API_KEY=WANDB_API_KEY \ + --env-secret AWS_ACCESS_KEY_ID=AWS_ACCESS_KEY_ID \ + --env-secret AWS_SECRET_ACCESS_KEY=AWS_SECRET_ACCESS_KEY \ + --env-secret R2_ACCESS_KEY_ID=R2_ACCESS_KEY_ID \ + --env-secret R2_SECRET_ACCESS_KEY=R2_SECRET_ACCESS_KEY \ + --env-secret R2_ENDPOINT_URL=R2_ENDPOINT_URL \ + --shared-memory 10GiB \ + --venv base \ + --yes \ + -- /bin/bash -c "source scripts/beaker/warm_hf_cache.sh && torchrun --nnodes ${NUM_NODES}:${NUM_NODES} --nproc-per-node 8 --rdzv_id=101 --rdzv_backend=c10d --rdzv_endpoint=\$BEAKER_LEADER_REPLICA_HOSTNAME:29400 scripts/train.py ${CONFIG_PATH} ${ARGS}" diff --git a/scripts/beaker/warm_hf_cache.sh b/scripts/beaker/warm_hf_cache.sh index cd8b77232..6e5eaae41 100755 --- a/scripts/beaker/warm_hf_cache.sh +++ b/scripts/beaker/warm_hf_cache.sh @@ -6,4 +6,4 @@ mkdir -p /root/.cache pushd /root/.cache curl "https://storage.googleapis.com/dirkgr-public/huggingface_cache_v2.tar.gz" | tar --keep-newer-files -xzf - popd -export HF_DATASETS_OFFLINE=1 \ No newline at end of file +export HF_DATASETS_OFFLINE=1 diff --git a/scripts/lumi/olmo7-ablations.sh b/scripts/lumi/olmo7-ablations.sh new file mode 100644 index 000000000..e91b6cce2 --- /dev/null +++ b/scripts/lumi/olmo7-ablations.sh @@ -0,0 +1,65 @@ +#!/bin/bash +#SBATCH --job-name=olmo7-ablation +#SBATCH --account=project_462000229 +#SBATCH --output=/pfs/lustref1/flash/project_462000229/logs/%j.log +#SBATCH --nodes=128 # Total number of nodes +#SBATCH --ntasks-per-node=8 +#SBATCH --gpus-per-node=8 # Allocate one gpu per MPI rank +#SBATCH --cpus-per-task=6 +#SBATCH --time=39:15:00 +#SBATCH --mem=0 # All memory on the node +#SBATCH --partition=standard-g + +WANDB_GROUP=$1 +shift + +export OLMO_CONTAINER=llm-lumi-torch21_latest.sif + +export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK +export MPICH_GPU_SUPPORT_ENABLED=1 +export NCCL_SOCKET_IFNAME=hsn +export NCCL_NET_GDR_LEVEL=3 +export MIOPEN_USER_DB_PATH=/tmp/${USER}-miopen-cache-${SLURM_JOB_ID} +export MIOPEN_CUSTOM_CACHE_DIR=${MIOPEN_USER_DB_PATH} +export CXI_FORK_SAFE=1 +export CXI_FORK_SAFE_HP=1 +export FI_CXI_DISABLE_CQ_HUGETLB=1 + +# We need to set this to avoid "Cassini Event Queue overflow detected." errors. +export FI_CXI_DEFAULT_CQ_SIZE=131072 + +#export NCCL_DEBUG=INFO +export PYTHONPATH=.:${PYTHONPATH} +export ROCM_PATH=/opt/rocm +export SINGULARITYENV_LD_LIBRARY_PATH=/usr/local/lib:/opt/cray/libfabric/1.15.2.0/lib64 + +# Try playing with max_split_size_mb if you run into OOM errors. +#export PYTORCH_HIP_ALLOC_CONF=max_split_size_mb:128 + +export HF_DATASETS_OFFLINE=1 + +export DATA_PATH=$FLASH_DIR/preprocessed/olmo-mix +export CHECKPOINTS_PATH=$FLASH_DIR/checkpoints +export EVAL_DATA_PATH=$SCRATCH_DIR/eval-data + +srun \ + --cpus-per-task=$SLURM_CPUS_PER_TASK \ + --distribution=block:block \ + --kill-on-bad-exit \ + scripts/run_with_environment.sh \ + singularity exec \ + -B"$PROJECT_DIR:$PROJECT_DIR" \ + -B"$FLASH_DIR:$FLASH_DIR" \ + -B"$SCRATCH_DIR:$SCRATCH_DIR" \ + -B /opt/cray:/opt/cray \ + -B /usr/lib64/libcxi.so.1:/usr/lib64/libcxi.so.1 \ + -B /usr/lib64/libjson-c.so.3:/usr/lib64/libjson-c.so.3 \ + $PROJECT_DIR/containers/$OLMO_CONTAINER \ + python scripts/train.py configs/olmo7-ablation.yaml ${@} \ + --run_name=${SLURM_JOB_ID} \ + --activation_checkpointing=fine_grained \ + --fsdp.wrapping_strategy=one_in_four \ + --fsdp.sharding_strategy=FULL_SHARD \ + --sharded_checkpointer=local \ + --time_limit=$((39 * 60 * 60)) \ + --wandb.group=$WANDB_GROUP diff --git a/scripts/prepare_memmap_dataset.py b/scripts/prepare_memmap_dataset.py index 7a802ff4c..f3b9aff31 100644 --- a/scripts/prepare_memmap_dataset.py +++ b/scripts/prepare_memmap_dataset.py @@ -374,6 +374,7 @@ def make_source_and_target( "--safe-mode/--fast-mode", default=False, help="Safe mode caches locally and decompresses using gzip.open" ) @click.option("-j", "--workers", "max_workers", type=int, default=1, help="Defaults to number of CPUs") +@click.option("--ack-deprecated", is_flag=True, help="Acknowledge that this command is deprecated") def main( src: Tuple[str, ...], output: str, @@ -389,7 +390,20 @@ def main( paths_per_worker: int = 1, max_workers: int = 1, cache_dir: Optional[str] = None, + ack_deprecated: bool = False, ): + print("WARNING: THIS SCRIPT IS DEPRECATED!!!") + print( + "Consider using the tokenization tool in the Dolma toolkit: " + "https://github.com/allenai/dolma/blob/main/docs/tokenize.md" + ) + + if not ack_deprecated: + continue_question = input("Do you want to continue? [y/N]: ") + if not (c := continue_question.lower().strip()) or c != "y": + print("Aborting.") + return + print("=== CONFIGURATION ===") print(f"src: {src}") print(f"output: {output}")