diff --git a/configs/mcli/v1-mix-medium-mitch-ish.yaml b/configs/mcli/v1-mix-medium-mitch-ish.yaml new file mode 100644 index 000000000..22fca520d --- /dev/null +++ b/configs/mcli/v1-mix-medium-mitch-ish.yaml @@ -0,0 +1,31 @@ +run_name: v1-mix-medium-mitch-ish +image: mosaicml/pytorch:2.0.1_cu118-python3.10-ubuntu20.04 +gpu_num: 216 +cluster: r12z3 +gpu_type: a100_40gb +integrations: + - integration_type: git_repo + git_repo: allenai/LLM + git_branch: main # make sure to update this! + pip_install: -e . + ssh_clone: true +command: |- + pip freeze + mkdir -p /root/.cache/torch/ + + export OMP_NUM_THREADS=8 + export LOG_FILTER_TYPE=local_rank0_only + export OLMO_NO_SSL=1 # we get SSLErrors all the time on this cluster + #export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128 + + cd LLM + + torchrun \ + --master_addr $MASTER_ADDR \ + --master_port $MASTER_PORT \ + --nnodes $NUM_NODES \ + --node_rank $NODE_RANK \ + --nproc_per_node 8 \ + scripts/train.py configs/v1-mix-medium-mitch-ish-mcli.yaml \ + --run_name=v1-mix-mitch-ish \ + --global_train_batch_size=2160 diff --git a/configs/mcli/v1-mix-medium.yaml b/configs/mcli/v1-mix-medium.yaml index 19df035dc..a27a098d9 100644 --- a/configs/mcli/v1-mix-medium.yaml +++ b/configs/mcli/v1-mix-medium.yaml @@ -7,12 +7,19 @@ integrations: - integration_type: git_repo git_repo: allenai/LLM git_branch: main # make sure to update this! - pip_install: -e .[all] + pip_install: -e . ssh_clone: true command: |- + pip freeze + mkdir -p /root/.cache/torch/ + export OMP_NUM_THREADS=8 + export LOG_FILTER_TYPE=local_rank0_only + export OLMO_NO_SSL=1 # we get SSLErrors all the time on this cluster #export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128 + cd LLM + torchrun \ --master_addr $MASTER_ADDR \ --master_port $MASTER_PORT \ diff --git a/configs/mcli/v1_5-mix-medium-mitch-ish.yaml b/configs/mcli/v1_5-mix-medium-mitch-ish.yaml new file mode 100644 index 000000000..1b142966f --- /dev/null +++ b/configs/mcli/v1_5-mix-medium-mitch-ish.yaml @@ -0,0 +1,31 @@ +run_name: v1-5-mix-medium-mitch-ish # can't have "_" or "." here +image: mosaicml/pytorch:2.0.1_cu118-python3.10-ubuntu20.04 +gpu_num: 216 +cluster: r12z3 +gpu_type: a100_40gb +integrations: + - integration_type: git_repo + git_repo: allenai/LLM + git_branch: main # make sure to update this! + pip_install: -e . + ssh_clone: true +command: |- + pip freeze + mkdir -p /root/.cache/torch/ + + export OMP_NUM_THREADS=8 + export LOG_FILTER_TYPE=local_rank0_only + export OLMO_NO_SSL=1 + #export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128 + + cd LLM + + torchrun \ + --master_addr $MASTER_ADDR \ + --master_port $MASTER_PORT \ + --nnodes $NUM_NODES \ + --node_rank $NODE_RANK \ + --nproc_per_node 8 \ + scripts/train.py configs/v1_5-mix-medium-mitch-ish-mcli.yaml \ + --run_name=v1_5-mix-mitch-ish \ + --global_train_batch_size=2160 diff --git a/configs/mcli/v1_5-mix-medium.yaml b/configs/mcli/v1_5-mix-medium.yaml index e6744b466..bbd51f828 100644 --- a/configs/mcli/v1_5-mix-medium.yaml +++ b/configs/mcli/v1_5-mix-medium.yaml @@ -7,12 +7,19 @@ integrations: - integration_type: git_repo git_repo: allenai/LLM git_branch: main # make sure to update this! - pip_install: -e .[all] + pip_install: -e . ssh_clone: true command: |- + pip freeze + mkdir -p /root/.cache/torch/ + export OMP_NUM_THREADS=8 + export LOG_FILTER_TYPE=local_rank0_only + export OLMO_NO_SSL=1 # we get SSLErrors all the time on this cluster #export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128 + cd LLM + torchrun \ --master_addr $MASTER_ADDR \ --master_port $MASTER_PORT \ diff --git a/configs/v1-mix-medium-mitch-ish-mcli.yaml b/configs/v1-mix-medium-mitch-ish-mcli.yaml new file mode 100644 index 000000000..6469cb04d --- /dev/null +++ b/configs/v1-mix-medium-mitch-ish-mcli.yaml @@ -0,0 +1,4420 @@ +run_name: v1-mix-medium-mitch-ish +seed: 6198 +dry_run: false + +wandb: + name: ${run_name} + project: olmo-medium + group: v1-mix + +model: + d_model: 4096 + n_heads: 32 + n_layers: 32 + # mlp_ratio: 6 + mlp_hidden_size: 22016 + weight_tying: false + alibi: false + rope: true + flash_attention: true + attention_dropout: 0.0 + attention_layer_norm: false + multi_query_attention: false + include_bias: false + block_type: sequential + layer_norm_type: default + layer_norm_with_affine: false + bias_for_layer_norm: false + attention_layer_norm_with_affine: false + activation_type: swiglu + residual_dropout: 0.0 + embedding_dropout: 0.0 + max_sequence_length: 2048 + vocab_size: 50280 + embedding_size: 50304 + eos_token_id: 0 + pad_token_id: 1 + init_device: meta + init_fn: mitchell + +compile: + fullgraph: false + +optimizer: + name: adamw + learning_rate: 3.0e-4 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + metrics_log_interval: 10 + +scheduler: + name: cosine_with_warmup + t_warmup: 2000 + alpha_f: 0.1 + +tokenizer: + identifier: tokenizers/allenai_eleuther-ai-gpt-neox-20b-pii-special.json + truncate_direction: right + +save_folder: runs/${run_name} +remote_save_folder: s3://ai2-llm/checkpoints/7b/${run_name} +save_overwrite: true +# Sharded checkpoints (best for restarts) +save_interval: 1000 +save_num_checkpoints_to_keep: -1 +# Unsharded checkpoints (for final storage) +save_interval_unsharded: null # getting errors on LUMI right now +save_num_unsharded_checkpoints_to_keep: -1 + +load_path: null + +max_duration: 476837 # 2T tokens +global_train_batch_size: 2048 +device_train_microbatch_size: 2 + +precision: amp_bf16 + +fsdp: + wrapping_strategy: by_block + precision: mixed + +max_grad_norm: 1.0 +max_grad_norm_ratio: null + +speed_monitor: + window_size: 20 + +eval_interval: ${save_interval} +eval_subset_num_batches: -1 +device_eval_batch_size: ${device_train_microbatch_size} +evaluators: + - label: all-small-ppl-validation + data: + num_workers: 0 + drop_last: true + # pin_memory: true + # prefetch_factor: 1 + # persistent_workers: false + # timeout: 0 + datasets: + 4chan-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/4chan/val.npy + c4_100_domains-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_100_domains/val.npy + c4_en-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_en/val.npy + gab-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/gab/val.npy + ice-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ice/val.npy + m2d2_s2orc-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_s2orc/val.npy + m2d2_wiki-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_wiki/val.npy + manosphere-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/manosphere/val.npy + mc4_en-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/mc4_en/val.npy + pile-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/pile/val.npy + ptb-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ptb/val.npy + twitterAEE-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/twitterAEE/val.npy + wikitext_103-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/wikitext_103/val.npy + + ########################## + # Downstream evaluations # + ########################## + - label: piqa + type: downstream + + - label: hellaswag + type: downstream + + - label: winogrande + type: downstream + + - label: openbook_qa + type: downstream + + # - label: boolq # requires implemention of the pmi_dc matrix + # type: downstream + + - label: sciq + type: downstream + + - label: arc_easy + type: downstream + + # - label: arc_challenge # requires implemention of the pmi_dc matrix + # type: downstream + + - label: copa + type: downstream + + - label: rte + type: downstream + + - label: commitment_bank + type: downstream + + - label: mrpc + type: downstream + + - label: sst2 + type: downstream + +data: + pad_direction: right + num_workers: 16 + drop_last: true + pin_memory: true + prefetch_factor: 1 + persistent_workers: true + timeout: 0 + paths: + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/books/0_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/books/0_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/books/0_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/books/0_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/books/1_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/books/1_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/books/2_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/books/2_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/books/2_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/books/2_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/00_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/00_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/00_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/00_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/01_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/01_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/01_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/01_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/02_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/02_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/02_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/02_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/03_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/03_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/03_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/03_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/04_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/04_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/04_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/04_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/05_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/05_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/05_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/05_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/06_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/06_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/06_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/06_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/07_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/07_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/07_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/07_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/08_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/08_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/08_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/08_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/09_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/09_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/09_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/09_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/10_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/10_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/10_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/10_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/11_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/11_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/11_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/11_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/12_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/12_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/12_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/12_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/13_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/13_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/13_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/13_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/14_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/14_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/14_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/14_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/15_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/15_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/15_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/15_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/16_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/16_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/16_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/16_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/17_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/17_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/17_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/17_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/18_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/18_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/18_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/18_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/19_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/19_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/19_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/19_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/20_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/20_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/20_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/20_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/21_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/21_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/21_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/21_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/22_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/22_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/22_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/22_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/23_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/23_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/23_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/23_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/24_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/24_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/24_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/24_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/25_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/25_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/25_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/25_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/26_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/26_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/26_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/26_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/27_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/27_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/27_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/27_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/28_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/28_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/28_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/28_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/29_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/29_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/29_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/29_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/30_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/30_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/30_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/30_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/31_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/31_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/31_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/31_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/32_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/32_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/32_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/32_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/33_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/33_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/33_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/33_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/34_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/34_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/34_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/34_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/35_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/35_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/35_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/35_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/36_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/36_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/36_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/36_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/37_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/37_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/37_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/37_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/38_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/38_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/38_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/38_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/39_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/39_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/39_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/39_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/40_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/40_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/40_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/40_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/41_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/41_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/41_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/41_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/42_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/42_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/42_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/42_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/43_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/43_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/43_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/43_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/44_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/44_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/44_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/44_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/45_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/45_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/45_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/45_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/46_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/46_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/46_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/46_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/47_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/47_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/47_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/47_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/48_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/48_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/48_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/48_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/49_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/49_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/49_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/49_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/50_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/50_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/50_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/50_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/51_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/51_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/51_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/51_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/52_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/52_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/52_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/52_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/53_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/53_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/53_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/53_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/54_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/54_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/54_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/54_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/55_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/55_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/55_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/55_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/56_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/56_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/56_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/56_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/57_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/57_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/57_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/57_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/58_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/58_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/58_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/58_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/59_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/59_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/59_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/59_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/60_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/60_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/60_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/60_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/61_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/61_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/61_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/61_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/62_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/62_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/62_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/62_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/63_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/63_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/63_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/63_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/64_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/64_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/64_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/64_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/65_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/65_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/65_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/65_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/66_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/66_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/66_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/66_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/67_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/67_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/67_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/67_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/68_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/69_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/69_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/69_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/69_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/70_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/70_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/70_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/70_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/71_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/71_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/71_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/71_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/72_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/72_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/72_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/72_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/73_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/73_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/73_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/73_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/74_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/74_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/74_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/74_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/75_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/75_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/75_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/75_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/76_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/76_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/76_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/76_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/77_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/77_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/77_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/77_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/78_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/78_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/78_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/78_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/79_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/79_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/79_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/79_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/80_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/80_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/80_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/80_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/81_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/81_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/81_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/81_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/82_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/82_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/82_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/82_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/83_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/83_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/83_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/83_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/84_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/84_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/84_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/84_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/85_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/85_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/85_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/85_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/000_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/000_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/001_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/001_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/002_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/002_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/003_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/003_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/004_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/005_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/005_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/006_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/006_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/007_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/007_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/008_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/008_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/009_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/009_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/010_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/010_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/011_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/011_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/012_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/012_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/013_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/014_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/014_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/015_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/015_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/016_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/016_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/017_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/017_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/018_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/018_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/019_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/019_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/020_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/020_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/021_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/021_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/022_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/022_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/023_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/023_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/024_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/024_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/025_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/025_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/026_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/027_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/027_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/028_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/028_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/029_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/029_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/030_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/030_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/031_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/031_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/032_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/032_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/033_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/033_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/034_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/034_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/035_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/035_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/036_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/036_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/037_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/038_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/038_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/039_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/039_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/040_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/040_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/041_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/041_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/042_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/042_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/043_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/043_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/044_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/044_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/045_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/045_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/046_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/046_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/047_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/047_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/048_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/048_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/049_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/049_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/050_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/050_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/051_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/052_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/052_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/053_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/053_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/054_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/054_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/055_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/056_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/056_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/057_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/057_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/058_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/058_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/059_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/059_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/060_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/060_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/061_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/061_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/062_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/062_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/063_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/063_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/064_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/064_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/065_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/065_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/066_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/066_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/067_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/067_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/068_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/068_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/069_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/069_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/070_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/070_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/071_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/071_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/072_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/072_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/073_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/073_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/074_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/074_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/075_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/075_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/076_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/076_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/077_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/077_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/078_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/078_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/079_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/079_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/080_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/080_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/081_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/081_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/082_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/082_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/083_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/083_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/084_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/084_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/085_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/086_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/087_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/087_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/088_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/089_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/089_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/090_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/090_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/091_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/091_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/092_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/092_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/093_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/093_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/094_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/094_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/095_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/095_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/096_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/096_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/097_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/098_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/098_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/099_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/099_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/100_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/100_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/101_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/101_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/102_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/103_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/103_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/104_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/104_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/105_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/105_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/106_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/106_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/107_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/107_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/108_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/108_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/109_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/109_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/110_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/110_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/111_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/111_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/112_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/112_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/113_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/113_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/114_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/114_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/115_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/115_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/116_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/116_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/117_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/117_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/118_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/118_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/119_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/119_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/120_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/120_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/121_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/121_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/122_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/122_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/123_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/123_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/124_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/124_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/125_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/125_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/126_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/126_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/127_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/127_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/128_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/129_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/129_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/130_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/130_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/131_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/131_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/132_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/132_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/133_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/133_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/134_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/134_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/135_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/135_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/136_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/136_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/137_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/137_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/138_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/138_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/139_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/139_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/140_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/140_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/141_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/141_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/142_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/142_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/143_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/143_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/144_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/144_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/145_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/145_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/146_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/146_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/147_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/147_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/148_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/148_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/149_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/149_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/150_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/150_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/151_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/151_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/152_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/152_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/153_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/153_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/154_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/154_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/155_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/155_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/156_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/156_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/157_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/157_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/158_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/158_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/159_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/159_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/160_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/160_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/161_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/161_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/162_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/162_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/163_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/163_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/164_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/164_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/165_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/165_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/166_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/166_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/167_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/167_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/168_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/168_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/169_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/169_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/170_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/170_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/171_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/171_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/172_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/172_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/173_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/173_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/174_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/174_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/175_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/175_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/176_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/176_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/177_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/177_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/178_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/178_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/179_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/179_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/180_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/180_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/181_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/181_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/182_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/182_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/183_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/183_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/184_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/184_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/185_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/185_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/186_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/186_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/187_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/187_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/188_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/188_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/189_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/189_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/190_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/190_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/191_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/191_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/192_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/192_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/193_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/193_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/194_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/194_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/195_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/195_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/196_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/196_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/197_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/197_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/198_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/198_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/199_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/199_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/200_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/200_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/201_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/201_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/202_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/202_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/203_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/203_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/204_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/204_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/205_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/205_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/206_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/206_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/207_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/207_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/208_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/208_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/209_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/209_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/210_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/210_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/211_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/211_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/212_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/212_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/213_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/213_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/214_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/214_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/215_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/215_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/216_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/216_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/217_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/217_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/218_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/219_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/219_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/220_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/220_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/221_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/221_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/222_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/222_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/223_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/223_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/224_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/224_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/225_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/225_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/226_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/226_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/227_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/227_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/228_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/228_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/229_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/229_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/230_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/230_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/231_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/231_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/232_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/232_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/233_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/233_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/234_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/234_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/235_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/235_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/236_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/236_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/237_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/237_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/238_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/238_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/239_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/239_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/240_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/240_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/241_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/241_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/242_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/242_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/243_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/243_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/244_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/244_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/245_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/245_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/246_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/246_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/247_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/247_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/248_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/248_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/249_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/249_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/250_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/250_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/251_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/251_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/252_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/252_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/253_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/253_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/254_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/254_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/255_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/255_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/256_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/256_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/257_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/257_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/258_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/258_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/259_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/259_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/260_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/260_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/261_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/261_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/262_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/262_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/263_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/263_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/264_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/264_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/265_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/265_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/266_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/266_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/267_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/267_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/268_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/268_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/269_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/269_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/270_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/270_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/271_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/271_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/272_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/272_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/273_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/273_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/274_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/274_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/275_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/276_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/276_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/277_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/277_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/278_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/278_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/279_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/279_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/280_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/280_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/281_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/281_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/282_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/282_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/283_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/283_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/284_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/284_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/285_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/285_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/286_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/286_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/287_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/287_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/288_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/288_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/289_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/289_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/290_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/290_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/291_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/291_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/292_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/292_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/293_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/293_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/294_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/294_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/295_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/295_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/296_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/296_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/297_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/297_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/298_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/298_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/299_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/299_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/300_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/300_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/301_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/301_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/302_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/302_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/303_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/303_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/304_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/304_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/305_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/305_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/306_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/306_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/307_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/307_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/308_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/308_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/309_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/309_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/310_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/310_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/311_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/311_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/312_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/312_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/313_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/313_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/314_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/314_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/315_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/315_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/316_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/316_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/317_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/317_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/318_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/318_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/319_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/319_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/320_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/320_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/321_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/321_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/322_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/322_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/323_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/323_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/324_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/324_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/325_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/325_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/326_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/326_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/327_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/327_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/328_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/328_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/329_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/329_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/330_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/330_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/331_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/331_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/332_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/332_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/333_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/333_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/334_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/334_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/335_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/335_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/336_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/336_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/337_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/337_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/338_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/338_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/339_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/339_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/340_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/340_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/341_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/341_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/342_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/342_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/343_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/343_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/344_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/344_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/345_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/345_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/346_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/346_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/347_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/347_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/348_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/348_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/349_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/349_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/350_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/350_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/351_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/351_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/352_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/352_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/353_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/353_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/354_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/354_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/355_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/355_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/356_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/356_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/357_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/357_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/358_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/358_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/359_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/359_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/360_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/360_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/361_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/361_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/362_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/362_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/363_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/363_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/364_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/364_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/365_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/365_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/366_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/366_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/367_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/367_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/368_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/368_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/369_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/369_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/370_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/370_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/371_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/371_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/372_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/372_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/373_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/373_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/374_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/374_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/375_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/375_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/376_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/376_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/377_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/377_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/378_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/378_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/379_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/379_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/380_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/380_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/381_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/381_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/382_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/382_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/383_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/383_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/384_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/384_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/385_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/385_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/386_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/386_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/387_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/387_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/388_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/388_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/389_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/389_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/390_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/390_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/391_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/391_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/392_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/392_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/393_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/393_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/394_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/394_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/395_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/395_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/396_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/396_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/397_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/397_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/398_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/398_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/399_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/399_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/400_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/400_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/401_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/401_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/402_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/402_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/403_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/403_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/404_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/404_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/405_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/405_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/406_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/406_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/407_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/407_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/408_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/408_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/409_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/409_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/410_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/410_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/411_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/411_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/412_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/412_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/413_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/413_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/414_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/414_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/415_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/415_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/416_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/416_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/417_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/417_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/418_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/418_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/419_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/419_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/420_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/420_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/421_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/421_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/422_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/422_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/423_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/423_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/424_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/424_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/425_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/425_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/426_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/426_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/427_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/427_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/428_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/428_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/429_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/429_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/430_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/430_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/431_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/431_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/432_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/432_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/433_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/433_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/434_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/434_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/435_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/435_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/436_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/436_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/437_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/437_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/438_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/438_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/439_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/439_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/440_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/440_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/441_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/441_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/442_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/442_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/443_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/443_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/444_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/444_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/445_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/445_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/446_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/446_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/447_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/447_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/448_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/448_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/449_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/449_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/450_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/450_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/451_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/451_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/452_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/452_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/453_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/453_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/454_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/454_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/455_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/455_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/456_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/457_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/457_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/458_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/459_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/459_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/460_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/460_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/461_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/461_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/462_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/462_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/463_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/463_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/464_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/464_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/465_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/465_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/466_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/466_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/467_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/467_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/468_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/468_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/469_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/469_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/470_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/470_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/471_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/471_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/472_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/472_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/473_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/473_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/474_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/474_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/475_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/475_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/476_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/476_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/477_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/477_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/478_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/479_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/479_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/480_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/480_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/481_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/481_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/482_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/482_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/483_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/483_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/484_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/484_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/485_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/486_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_head/486_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/000_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/000_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/001_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/001_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/002_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/002_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/003_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/003_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/004_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/004_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/005_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/005_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/006_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/006_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/007_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/007_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/008_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/008_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/009_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/009_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/010_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/010_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/011_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/011_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/012_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/012_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/013_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/013_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/014_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/014_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/015_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/015_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/016_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/016_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/017_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/017_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/018_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/018_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/019_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/019_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/020_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/020_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/021_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/021_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/022_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/022_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/023_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/023_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/024_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/024_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/025_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/025_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/026_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/026_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/027_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/027_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/028_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/028_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/029_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/029_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/030_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/030_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/031_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/031_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/032_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/032_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/033_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/033_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/034_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/034_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/035_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/035_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/036_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/036_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/037_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/037_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/038_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/039_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/039_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/040_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/040_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/041_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/041_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/042_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/042_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/043_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/043_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/044_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/044_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/045_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/045_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/046_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/046_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/047_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/047_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/048_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/048_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/049_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/049_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/050_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/050_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/051_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/051_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/052_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/052_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/053_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/053_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/054_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/054_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/055_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/055_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/056_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/056_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/057_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/057_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/058_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/058_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/059_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/059_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/060_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/060_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/061_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/061_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/062_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/062_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/063_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/063_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/064_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/064_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/065_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/065_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/066_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/066_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/067_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/067_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/068_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/068_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/069_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/069_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/070_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/070_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/071_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/072_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/072_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/073_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/073_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/074_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/074_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/075_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/075_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/076_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/076_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/077_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/077_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/078_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/078_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/079_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/079_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/080_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/080_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/081_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/081_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/082_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/082_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/083_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/083_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/084_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/084_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/085_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/085_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/086_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/086_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/087_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/087_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/088_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/088_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/089_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/089_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/090_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/090_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/091_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/091_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/092_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/092_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/093_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/093_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/094_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/094_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/095_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/095_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/096_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/096_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/097_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/097_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/098_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/098_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/099_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/099_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/100_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/100_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/101_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/101_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/102_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/102_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/103_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/103_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/104_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/104_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/105_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/105_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/106_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/106_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/107_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/107_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/108_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/108_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/109_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/109_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/110_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/110_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/111_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/111_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/112_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/112_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/113_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/113_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/114_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/114_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/115_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/115_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/116_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/116_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/117_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/117_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/118_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/118_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/119_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/120_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/120_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/121_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/121_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/122_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/122_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/123_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/123_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/124_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/124_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/125_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/125_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/126_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/126_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/127_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/127_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/128_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/128_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/129_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/129_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/130_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/130_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/131_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/131_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/132_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/132_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/133_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/133_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/134_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/134_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/135_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/135_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/136_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/136_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/137_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/137_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/138_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/138_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/139_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/139_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/140_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/140_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/141_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/141_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/142_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/142_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/143_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/143_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/144_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/144_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/145_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/145_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/146_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/146_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/147_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/147_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/148_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/148_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/149_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/149_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/150_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/150_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/151_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/151_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/152_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/152_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/153_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/153_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/154_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/154_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/155_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/155_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/156_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/156_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/157_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/157_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/158_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/158_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/159_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/159_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/160_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/160_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/161_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/161_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/162_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/162_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/163_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/163_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/164_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/164_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/165_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/165_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/166_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/166_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/167_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/167_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/168_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/168_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/169_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/169_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/170_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/170_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/171_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/171_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/172_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/172_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/173_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/173_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/174_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/174_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/175_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/175_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/176_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/176_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/177_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/177_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/178_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/178_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/179_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/179_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/180_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/180_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/181_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/181_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/182_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/182_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/183_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/183_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/184_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/184_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/185_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/185_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/186_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/186_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/187_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/187_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/188_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/188_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/189_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/189_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/190_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/190_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/191_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/191_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/192_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/192_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/193_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/193_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/194_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/194_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/195_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/195_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/196_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/196_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/197_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/197_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/198_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/198_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/199_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/199_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/200_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/200_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/201_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/201_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/202_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/202_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/203_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/203_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/204_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/204_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/205_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/205_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/206_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/206_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/207_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/207_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/208_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/208_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/209_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/209_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/210_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/210_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/211_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/211_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/212_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/212_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/213_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/213_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/214_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/214_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/215_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/215_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/216_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/216_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/217_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/217_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/218_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/218_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/219_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/219_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/220_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/220_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/221_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/221_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/222_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/222_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/223_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/223_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/224_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/224_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/225_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/225_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/226_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/226_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/227_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/227_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/228_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/228_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/229_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/229_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/230_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/230_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/231_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/232_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/232_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/233_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/233_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/234_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/234_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/235_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/235_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/236_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/236_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/237_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/237_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/238_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/238_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/239_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/239_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/240_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/240_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/241_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/242_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/242_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/243_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/243_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/244_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/244_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/245_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/245_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/246_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/246_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/247_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/247_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/248_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/248_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/249_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/249_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/250_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/250_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/251_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/251_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/252_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/252_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/253_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/253_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/254_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/254_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/255_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/256_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/256_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/257_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/257_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/258_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/258_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/259_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/259_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/260_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/260_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/261_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/261_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/262_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/262_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/263_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/263_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/264_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/264_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/265_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/265_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/266_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/266_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/267_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/267_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/268_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/268_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/269_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/269_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/270_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/270_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/271_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/271_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/272_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/272_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/273_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/273_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/274_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/274_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/275_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/275_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/276_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/276_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/277_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/277_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/278_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/278_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/279_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/279_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/280_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/280_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/281_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/281_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/282_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/282_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/283_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/283_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/284_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/284_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/285_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/285_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/286_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/286_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/287_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/287_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/288_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/288_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/289_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/289_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/290_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/290_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/291_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/292_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/292_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/293_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/294_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/294_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/295_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/295_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/296_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/296_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/297_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/297_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/298_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/298_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/299_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/299_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/300_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/300_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/301_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/301_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/302_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/302_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/303_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/303_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/304_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/304_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/305_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/305_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/306_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/306_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/307_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/307_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/308_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/309_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/309_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/310_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/310_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/311_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/311_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/312_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/312_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/313_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/313_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/314_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/314_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/315_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/315_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/316_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/316_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/317_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/318_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/318_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/319_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/319_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/320_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/320_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/321_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/322_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/322_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/323_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/323_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/324_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/324_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/325_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/325_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/326_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/326_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/327_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/327_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/328_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/328_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/329_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/329_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/330_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/330_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/331_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/331_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/332_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/332_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/333_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/333_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/334_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/334_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/335_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/335_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/336_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/336_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/337_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/337_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/338_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/338_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/339_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/339_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/340_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/340_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/341_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/341_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/342_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/342_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/343_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/343_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/344_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/344_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/345_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/345_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/346_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/346_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/347_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/347_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/348_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/348_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/349_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/349_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/350_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/350_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/351_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/351_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/352_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/352_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/353_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/354_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/354_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/355_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/355_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/356_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/356_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/357_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/357_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/358_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/358_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/359_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/359_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/360_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/360_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/361_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/361_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/362_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/362_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/363_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/363_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/364_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/364_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/365_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/365_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/366_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/366_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/367_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/367_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/368_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/368_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/369_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/369_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/370_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/370_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/371_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/371_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/372_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/372_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/373_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/373_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/374_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/374_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/375_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/375_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/376_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/376_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/377_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/377_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/378_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/378_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/379_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/379_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/380_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/380_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/381_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/381_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/382_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/382_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/383_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/383_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/384_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/384_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/385_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/385_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/386_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/386_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/387_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/387_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/388_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/388_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/389_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/389_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/390_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/390_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/391_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/391_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/392_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/392_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/393_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/393_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/394_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/394_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/395_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/395_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/396_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/396_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/397_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/397_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/398_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/398_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/399_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/399_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/400_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/400_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/401_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/401_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/402_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/402_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/403_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/403_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/404_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/404_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/405_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/405_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/406_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/406_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/407_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/407_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/408_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/408_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/409_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/409_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/410_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/410_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/411_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/411_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/412_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/412_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/413_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/413_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/414_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/414_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/415_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/415_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/416_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/416_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/417_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/417_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/418_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/418_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/419_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/419_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/420_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/420_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/421_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/421_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/422_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/422_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/423_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/423_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/424_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/424_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/425_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/425_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/426_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/426_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/427_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/427_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/428_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/428_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/429_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/429_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/430_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/430_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/431_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/431_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/432_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/432_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/433_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/433_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/434_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/434_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/435_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/435_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/436_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/436_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/437_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/437_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/438_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/438_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/439_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/439_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/440_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/440_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/441_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/441_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/442_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/442_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/443_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/443_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/444_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/444_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/445_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/445_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/446_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/446_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/447_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/447_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/448_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/448_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/449_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/450_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/450_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/451_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/451_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/452_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/452_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/453_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/453_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/454_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/454_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/455_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/455_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/456_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/456_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/457_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/457_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/458_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/459_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/459_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/460_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/460_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/461_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/461_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/462_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/462_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/463_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/463_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/464_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/464_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/465_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/466_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/466_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/467_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/467_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/468_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/468_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/469_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/469_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/470_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/470_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/471_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/471_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/472_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/472_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/473_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/473_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/474_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/474_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/475_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/475_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/476_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/476_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/477_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/477_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/478_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/478_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/479_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/479_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/480_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/481_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/481_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/482_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/482_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/483_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/483_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/484_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/484_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/485_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/485_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/486_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/486_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/487_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/487_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/488_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/488_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/489_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/489_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/490_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/490_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/491_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/491_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/492_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/492_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/493_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/493_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/494_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/494_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/495_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/495_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/496_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/496_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/497_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/497_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/498_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/498_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/499_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/499_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/500_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/500_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/501_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/501_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/502_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/502_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/503_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/503_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/504_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/504_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/505_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/505_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/506_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/506_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/507_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/507_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/508_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/508_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/509_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/509_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/510_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/510_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/511_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/511_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/512_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/512_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/513_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/513_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/514_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/514_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/515_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/515_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/516_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/516_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/517_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/517_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/518_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/518_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/519_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/519_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/520_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/520_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/521_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/521_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/522_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/522_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/523_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/523_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/524_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/524_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/525_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/525_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/526_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/527_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/527_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/528_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/528_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/529_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/529_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/530_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/530_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/531_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/531_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/532_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/532_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/533_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/533_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/534_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/534_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/535_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/535_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/536_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/536_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/537_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/537_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/538_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/538_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/539_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/539_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/540_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/540_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/541_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_middle/541_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/000_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/000_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/001_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/001_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/002_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/002_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/003_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/003_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/004_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/004_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/005_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/005_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/006_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/006_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/007_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/007_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/008_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/008_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/009_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/009_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/010_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/010_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/011_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/011_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/012_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/012_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/013_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/013_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/014_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/014_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/015_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/015_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/016_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/016_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/017_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/017_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/018_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/018_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/019_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/019_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/020_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/020_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/021_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/021_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/022_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/022_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/023_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/023_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/024_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/024_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/025_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/025_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/026_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/026_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/027_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/027_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/028_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/028_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/029_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/029_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/030_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/030_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/031_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/031_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/032_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/032_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/033_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/033_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/034_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/034_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/035_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/035_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/036_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/036_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/037_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/037_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/038_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/038_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/039_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/039_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/040_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/040_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/041_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/041_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/042_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/042_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/043_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/043_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/044_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/044_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/045_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/045_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/046_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/046_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/047_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/047_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/048_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/048_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/049_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/049_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/050_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/050_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/051_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/051_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/052_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/052_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/053_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/053_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/054_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/054_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/055_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/055_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/056_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/056_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/057_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/057_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/058_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/058_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/059_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/059_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/060_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/060_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/061_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/061_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/062_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/062_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/063_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/063_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/064_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/064_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/065_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/065_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/066_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/066_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/067_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/067_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/068_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/068_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/069_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/069_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/070_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/070_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/071_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/071_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/072_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/072_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/073_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/073_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/074_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/074_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/075_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/075_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/076_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/076_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/077_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/077_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/078_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/078_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/079_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/079_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/080_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/080_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/081_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/081_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/082_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/082_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/083_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/083_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/084_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/084_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/085_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/085_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/086_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/086_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/087_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/087_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/088_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/088_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/089_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/089_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/090_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/090_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/091_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/091_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/092_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/092_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/093_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/093_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/094_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/094_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/095_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/095_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/096_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/096_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/097_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/097_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/098_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/098_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/099_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/099_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/100_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/100_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/101_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/101_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/102_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/102_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/103_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/103_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/104_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/104_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/105_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/105_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/106_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/106_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/107_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/107_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/108_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/108_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/109_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/109_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/110_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/110_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/111_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/111_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/112_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/112_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/113_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/113_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/114_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/114_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/115_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/115_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/116_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/116_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/117_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/117_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/118_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/118_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/119_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/119_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/120_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/120_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/121_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/121_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/122_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/122_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/123_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/123_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/124_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/124_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/125_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/125_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/126_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/126_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/127_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/127_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/128_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/128_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/129_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/129_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/130_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/130_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/131_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/131_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/132_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/132_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/133_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/133_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/134_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/134_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/135_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/135_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/136_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/136_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/137_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/137_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/138_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/138_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/139_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/139_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/140_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/140_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/141_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/141_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/142_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/142_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/143_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/143_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/144_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/144_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/145_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/145_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/146_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/146_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/147_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/147_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/148_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/148_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/149_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/149_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/150_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/150_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/151_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/151_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/152_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/152_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/153_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/153_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/154_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/154_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/155_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/155_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/156_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/156_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/157_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/157_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/158_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/158_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/159_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/159_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/160_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/160_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/161_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/161_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/162_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/162_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/163_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/163_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/164_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/164_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/165_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/165_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/166_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/166_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/167_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/167_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/168_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/168_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/169_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/169_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/170_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/170_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/171_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/171_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/172_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/172_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/173_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/173_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/174_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/174_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/175_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/175_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/176_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/176_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/177_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/177_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/178_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/178_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/179_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/179_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/180_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/180_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/181_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/181_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/182_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/182_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/183_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/183_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/184_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/184_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/185_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/185_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/186_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/186_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/187_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/187_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/188_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/188_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/189_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/189_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/190_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/190_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/191_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/191_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/192_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/192_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/193_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/193_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/194_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/194_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/195_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/195_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/196_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/196_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/197_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/197_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/198_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/198_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/199_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/199_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/200_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/200_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/201_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/201_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/202_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/202_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/203_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/203_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/204_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/204_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/205_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/205_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/206_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/206_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/207_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/207_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/208_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/208_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/209_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/209_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/210_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/210_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/211_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/211_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/212_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/212_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/213_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/213_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/214_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/214_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/215_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/215_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/216_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/216_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/217_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/217_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/218_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/218_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/219_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/219_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/220_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/220_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/221_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/221_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/222_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/222_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/223_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/223_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/224_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/224_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/225_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/225_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/226_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/226_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/227_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/227_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/228_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/228_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/229_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/229_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/230_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/230_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/231_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/231_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/232_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/232_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/233_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/233_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/234_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/234_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/235_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/235_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/236_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/236_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/237_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/237_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/238_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/238_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/239_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/239_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/240_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/240_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/241_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/241_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/242_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/242_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/243_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/243_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/244_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/244_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/245_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/245_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/246_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/246_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/247_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/247_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/248_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/248_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/249_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/249_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/250_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/250_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/251_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/251_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/252_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/252_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/253_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/253_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/254_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/254_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/255_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/255_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/256_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/256_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/257_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/257_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/258_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/258_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/259_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/259_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/260_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/260_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/261_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/261_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/262_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/262_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/263_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/263_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/264_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/264_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/265_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/265_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/266_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/266_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/267_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/267_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/268_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/268_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/269_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/269_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/270_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/270_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/271_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/271_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/272_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/272_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/273_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/273_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/274_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/274_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/275_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/275_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/276_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/276_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/277_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/277_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/278_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/278_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/279_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/279_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/280_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/280_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/281_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/281_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/282_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/282_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/283_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/283_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/284_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/284_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/285_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/285_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/286_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/286_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/287_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/287_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/288_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/288_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/289_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/289_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/290_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/290_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/291_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/291_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/292_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/292_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/293_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/293_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/294_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/294_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/295_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/295_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/296_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/296_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/297_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/297_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/298_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/298_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/299_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/299_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/300_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/300_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/301_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/301_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/302_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/302_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/303_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/303_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/304_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/304_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/305_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/305_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/306_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/306_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/307_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/307_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/308_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/308_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/309_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/309_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/310_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/310_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/311_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/311_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/312_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/312_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/313_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/313_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/314_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/314_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/315_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/315_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/316_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/316_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/317_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/317_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/318_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/318_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/319_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/319_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/320_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/320_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/321_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/321_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/322_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/322_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/323_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/323_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/324_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/324_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/325_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/325_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/326_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/326_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/327_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/327_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/328_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/328_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/329_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/329_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/330_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/330_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/331_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/331_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/332_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/332_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/333_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/333_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/334_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/334_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/335_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/335_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/336_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/336_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/337_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/337_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/338_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/338_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/339_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/339_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/340_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/341_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/341_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/342_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/342_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/343_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/343_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/344_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/344_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/345_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/345_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/346_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/346_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/347_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/347_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/348_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/348_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/349_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/349_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/350_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/350_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/351_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/351_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/352_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/352_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/353_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/353_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/354_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/354_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/355_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/355_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/356_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/356_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/357_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/357_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/358_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/358_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/359_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/359_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/360_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/360_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/361_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/361_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/362_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/362_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/363_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/363_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/364_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/364_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/365_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/365_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/366_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/366_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/367_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/367_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/368_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/368_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/369_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/369_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/370_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/370_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/371_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/371_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/372_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/372_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/373_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/373_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/374_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/374_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/375_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/375_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/376_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/376_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/377_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/377_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/378_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/378_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/379_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/379_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/380_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/380_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/381_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/381_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/382_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/382_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/383_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/383_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/384_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/384_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/385_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/385_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/386_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/386_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/387_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/387_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/388_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/388_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/389_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/389_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/390_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/390_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/391_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/391_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/392_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/392_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/393_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/393_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/394_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/394_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/395_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/395_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/396_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/396_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/397_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/397_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/398_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/398_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/399_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/399_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/400_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/400_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/401_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/401_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/402_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/402_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/403_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/403_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/404_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/404_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/405_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/405_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/406_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/406_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/407_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/407_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/408_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/408_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/409_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/409_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/410_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/410_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/411_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/411_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/412_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/412_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/413_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/413_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/414_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/414_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/415_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/415_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/416_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/416_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/417_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/417_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/418_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/418_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/419_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/419_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/420_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/420_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/421_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/421_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/422_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/422_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/423_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/423_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/424_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/424_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/425_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/425_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/426_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/426_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/427_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/427_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/428_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/428_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/429_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/429_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/430_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/430_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/431_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/431_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/432_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/432_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/433_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/433_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/434_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/434_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/435_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/435_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/436_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/436_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/437_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/437_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/438_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/438_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/439_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/439_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/440_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/440_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/441_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/441_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/442_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/442_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/443_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/443_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/444_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/444_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/445_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/445_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/446_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/446_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/447_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/447_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/448_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/448_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/449_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/449_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/450_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/450_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/451_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/451_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/452_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/452_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/453_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/453_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/454_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/454_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/455_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/455_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/456_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/456_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/457_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/457_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/458_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/458_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/459_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/459_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/460_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/460_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/461_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/461_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/462_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/462_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/463_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/463_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/464_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/464_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/465_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/465_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/466_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/466_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/467_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/467_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/468_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/468_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/469_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/469_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/470_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/470_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/471_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/471_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/472_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/472_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/473_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/473_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/474_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/474_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/475_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/475_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/476_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/476_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/477_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/477_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/478_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/478_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/479_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/479_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/480_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/480_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/481_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/481_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/482_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/482_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/483_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/483_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/484_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/484_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/485_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/485_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/486_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/486_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/487_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/487_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/488_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/488_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/489_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/489_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/490_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/490_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/491_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/491_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/492_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/492_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/493_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/493_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/494_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/494_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/495_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/495_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/496_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/496_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/497_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/497_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/498_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/498_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/499_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/499_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/500_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/500_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/501_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/501_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/502_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/502_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/503_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/503_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/504_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/504_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/505_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/505_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/506_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/506_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/507_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/507_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/508_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/508_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/509_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/509_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/510_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/510_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/511_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/511_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/512_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/512_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/513_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/513_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/514_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/514_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/515_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/515_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/516_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/516_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/517_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/517_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/518_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/518_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/519_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/519_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/520_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/520_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/521_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/521_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/522_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/522_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/523_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/523_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/524_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/524_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/525_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/525_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/526_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/cc_en_tail/526_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/00_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/00_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/00_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/01_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/01_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/01_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/02_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/02_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/02_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/03_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/03_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/03_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/04_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/04_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/04_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/05_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/05_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/05_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/06_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/06_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/06_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/07_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/07_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/08_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/08_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/08_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/09_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/09_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/09_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/10_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/10_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/10_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/11_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/11_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/11_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/12_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/12_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/12_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/13_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/13_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/14_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/14_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/14_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/15_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/15_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/16_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/16_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/16_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/17_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/17_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/18_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/18_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/18_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/19_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/19_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/19_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/20_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/20_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/20_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/21_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/21_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/22_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/22_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/22_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/23_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/23_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/24_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/24_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/24_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/25_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/25_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/25_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/26_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/26_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/27_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/27_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/27_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/28_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/28_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/29_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/29_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/29_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/30_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/30_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/30_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/31_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/31_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/31_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/32_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/32_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/32_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/33_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/33_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/33_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/34_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/34_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/34_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/35_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/35_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/35_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/36_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/36_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/36_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/37_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/37_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/37_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/38_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/38_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/39_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/39_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/39_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/40_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/40_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/40_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/41_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/41_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/41_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/000_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/000_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/000_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/000_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/001_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/001_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/002_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/002_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/003_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/003_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/004_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/004_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/004_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/005_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/005_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/006_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/006_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/006_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/006_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/007_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/007_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/007_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/007_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/008_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/008_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/009_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/009_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/010_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/010_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/010_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/011_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/011_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/012_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/012_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/013_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/013_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/014_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/014_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/015_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/015_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/015_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/016_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/016_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/016_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/017_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/017_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/018_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/018_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/019_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/019_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/019_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/020_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/020_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/021_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/021_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/021_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/022_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/022_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/023_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/023_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/024_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/024_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/025_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/025_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/026_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/026_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/026_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/027_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/027_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/028_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/028_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/029_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/029_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/030_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/030_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/030_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/031_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/031_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/032_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/032_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/033_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/033_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/033_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/034_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/034_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/035_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/035_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/036_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/036_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/036_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/037_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/037_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/038_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/038_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/038_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/039_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/039_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/039_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/039_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/040_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/040_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/041_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/041_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/042_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/042_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/042_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/042_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/043_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/043_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/043_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/044_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/044_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/044_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/045_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/045_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/046_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/046_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/047_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/047_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/048_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/048_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/049_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/049_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/050_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/050_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/051_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/051_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/051_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/051_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/052_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/052_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/053_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/053_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/054_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/054_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/054_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/055_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/055_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/056_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/056_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/057_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/057_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/058_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/058_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/058_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/058_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/059_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/059_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/060_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/060_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/061_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/061_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/062_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/062_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/062_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/063_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/063_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/064_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/064_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/064_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/064_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/065_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/065_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/065_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/066_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/066_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/067_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/067_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/067_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/068_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/068_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/069_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/069_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/069_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/069_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/070_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/070_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/071_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/071_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/071_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/072_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/072_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/072_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/072_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/073_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/073_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/073_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/074_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/074_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/075_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/075_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/076_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/076_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/076_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/076_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/077_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/077_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/077_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/077_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/078_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/078_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/079_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/079_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/080_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/080_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/080_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/080_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/081_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/081_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/082_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/082_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/083_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/083_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/083_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/083_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/084_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/084_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/085_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/085_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/085_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/086_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/086_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/087_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/087_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/088_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/088_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/088_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/088_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/089_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/089_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/089_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/090_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/090_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/091_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/091_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/091_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/091_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/092_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/092_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/092_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/092_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/093_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/093_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/093_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/093_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/094_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/094_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/095_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/095_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/096_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/097_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/097_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/098_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/098_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/099_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/099_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/100_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/100_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/100_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/100_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/101_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/101_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/102_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/102_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/102_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/103_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/103_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/104_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/104_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/104_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/105_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/105_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/106_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/106_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/107_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/107_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/108_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/108_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/109_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/109_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/109_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/110_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/110_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/111_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/111_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/112_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/112_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/112_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/113_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/113_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/113_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/113_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/114_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/114_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/114_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/114_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/115_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/115_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/116_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/116_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/116_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/116_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/117_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/117_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/118_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/118_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/118_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/119_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/119_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/120_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/120_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/120_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/121_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/121_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/122_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/122_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/122_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/122_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/123_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/123_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/123_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/123_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/124_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/124_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/125_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/125_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/125_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/125_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/126_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/126_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/127_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/127_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/128_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/128_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/129_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/129_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/130_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/130_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/130_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/131_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/131_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/131_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/132_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/132_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/133_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/133_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/134_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/134_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/135_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/135_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/135_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/135_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/136_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/136_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/136_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/136_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/137_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/137_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/137_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/138_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/138_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/139_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/139_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/140_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/140_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/141_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/141_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/142_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/142_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/143_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/143_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/143_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/144_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/144_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/145_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/145_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/146_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/146_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/147_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/147_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/148_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/148_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/149_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/149_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/150_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/150_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/151_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/151_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/152_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/152_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/152_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/152_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/153_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/153_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/153_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/153_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/154_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/154_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/155_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/155_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/156_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/156_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/157_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/157_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/157_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/158_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/158_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/159_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/159_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/159_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/160_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/160_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/161_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/161_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/162_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/162_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/163_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/163_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/164_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/164_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/164_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/165_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/165_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/166_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/166_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/167_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/167_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/167_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/167_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/168_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/168_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/168_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/168_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/169_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/169_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/170_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/170_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/170_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/170_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/171_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/171_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/171_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/172_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/172_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/173_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/173_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/173_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/173_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/174_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/174_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/174_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/174_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/175_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/175_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/176_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/176_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/177_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/177_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/177_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/177_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/178_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/178_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/178_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/179_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/179_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/180_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/180_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/181_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/181_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/182_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/182_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/183_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/183_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/183_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/183_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/184_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/184_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/185_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/185_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/186_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/186_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/186_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/187_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/187_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/187_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/187_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/188_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/188_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/189_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/189_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/190_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/190_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/190_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/191_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/191_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/191_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/191_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/192_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/192_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/193_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/193_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/193_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/193_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/194_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/194_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/195_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/195_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/196_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/196_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/197_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/197_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/198_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/198_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/198_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/199_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/199_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/200_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/200_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/200_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/201_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/201_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/202_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/202_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/203_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/203_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/204_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/204_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/205_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/205_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/205_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/206_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/206_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/207_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/207_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/207_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/207_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/208_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/208_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/208_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/209_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/209_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/210_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/210_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/211_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/211_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/212_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/212_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/213_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/213_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/214_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/214_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/214_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/215_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/215_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/216_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/216_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/216_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/217_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/217_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/218_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/218_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/218_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/218_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/219_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/219_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/220_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/220_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/220_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/220_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/221_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/221_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/222_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/222_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/223_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/223_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/224_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/224_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/224_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/224_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/225_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/225_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/225_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/225_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/226_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/226_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/227_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/227_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/228_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/228_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/229_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/229_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/230_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/230_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/230_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/231_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/231_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/232_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/232_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/232_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/232_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/233_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/233_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/234_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/234_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/234_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/234_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/235_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/235_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/235_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/236_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/236_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/237_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/237_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/238_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/238_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/238_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/238_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/239_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/239_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/240_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/240_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/240_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/240_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/241_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/241_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/242_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/242_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/243_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/243_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/244_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/244_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/245_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/245_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/246_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/246_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/247_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/247_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/247_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/248_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/248_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/249_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/249_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/250_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/250_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/250_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/251_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/251_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/251_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/252_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/252_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/253_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/253_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/254_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/254_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/254_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/254_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/255_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/255_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/255_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/256_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/256_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/257_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/257_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/257_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/258_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/258_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/259_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/259_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/260_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/260_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/261_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/261_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/262_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/262_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/262_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/263_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/263_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/263_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/264_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/264_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/264_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/264_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/265_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/265_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/266_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/266_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/267_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/267_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/267_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/wiki/0_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/wiki/0_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/wiki/0_00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/wiki/0_00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/wiki/0_00004.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/wiki/1_00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/wiki/1_00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/wiki/1_00002.npy diff --git a/configs/v1-mix-medium-mitch-ish.yaml b/configs/v1-mix-medium-mitch-ish.yaml new file mode 100644 index 000000000..cbfccd235 --- /dev/null +++ b/configs/v1-mix-medium-mitch-ish.yaml @@ -0,0 +1,170 @@ +run_name: v1-mix-medium-mitch-ish +seed: 6198 +dry_run: false + +wandb: + name: ${run_name} + project: olmo-medium + group: v1-mix + +model: + d_model: 4096 + n_heads: 32 + n_layers: 32 + # mlp_ratio: 6 + mlp_hidden_size: 22016 + weight_tying: false + alibi: false + rope: true + attention_dropout: 0.0 + attention_layer_norm: false + multi_query_attention: false + include_bias: false + block_type: sequential + layer_norm_type: default + layer_norm_with_affine: false + bias_for_layer_norm: false + attention_layer_norm_with_affine: false + activation_type: swiglu + residual_dropout: 0.0 + embedding_dropout: 0.0 + max_sequence_length: 2048 + vocab_size: 50280 + embedding_size: 50304 + eos_token_id: 0 + pad_token_id: 1 + init_device: meta + init_fn: mitchell + +compile: null # causes instability on AMD GPUs + +optimizer: + name: adamw + learning_rate: 3.0e-4 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + metrics_log_interval: 10 + +scheduler: + name: cosine_with_warmup + t_warmup: 2000 + alpha_f: 0.1 + +tokenizer: + identifier: tokenizers/allenai_eleuther-ai-gpt-neox-20b-pii-special.json + truncate_direction: right + +save_folder: ${path.choose:${oc.env:FLASH_DIR,no_exist}/checkpoints,/results}/${oc.env:SLURM_JOB_ID,${run_name}} +save_overwrite: false +# Sharded checkpoints (best for restarts) +save_interval: 1000 +save_num_checkpoints_to_keep: -1 +# Unsharded checkpoints (for final storage) +save_interval_unsharded: null # getting errors on LUMI right now +save_num_unsharded_checkpoints_to_keep: -1 + +load_path: null + +max_duration: 476837 # 2T tokens +global_train_batch_size: 2048 +device_train_microbatch_size: 2 + +precision: amp_bf16 +fsdp: + precision: mixed + +max_grad_norm: 1.0 +max_grad_norm_ratio: null + +speed_monitor: + window_size: 20 + +data: + paths: ${path.glob:${oc.env:FLASH_DIR,no_exist}/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/books/*.npy,${oc.env:FLASH_DIR,no_exist}/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/c4/*.npy,${oc.env:FLASH_DIR,no_exist}/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/common-crawl/*/*.npy,${oc.env:FLASH_DIR,no_exist}/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/s2/*.npy,${oc.env:FLASH_DIR,no_exist}/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/stack/*.npy,${oc.env:FLASH_DIR,no_exist}/preprocessed/olmo-mix/v1-sample/gpt-neox-20b-pii-special/wiki/*.npy} + pad_direction: right + num_workers: 0 + drop_last: true + pin_memory: true + prefetch_factor: 16 + persistent_workers: true + timeout: 0 + +eval_interval: ${save_interval} +eval_subset_num_batches: -1 +device_eval_batch_size: ${device_train_microbatch_size} +evaluators: + # lump all the small datasets together (we still get separate metrics). + - label: all-small-ppl-validation + data: + datasets: + 4chan-validation: + - ${path.choose:${oc.env:SCRATCH_DIR,no_exist},/net/nfs.cirrascale/allennlp/llm-data}/eval-data/perplexity/v2_small_gptneox20b/4chan/val.npy + c4_100_domains-validation: + - ${path.choose:${oc.env:SCRATCH_DIR,no_exist},/net/nfs.cirrascale/allennlp/llm-data}/eval-data/perplexity/v2_small_gptneox20b/c4_100_domains/val.npy + c4_en-validation: + - ${path.choose:${oc.env:SCRATCH_DIR,no_exist},/net/nfs.cirrascale/allennlp/llm-data}/eval-data/perplexity/v2_small_gptneox20b/c4_en/val.npy + gab-validation: + - ${path.choose:${oc.env:SCRATCH_DIR,no_exist},/net/nfs.cirrascale/allennlp/llm-data}/eval-data/perplexity/v2_small_gptneox20b/gab/val.npy + ice-validation: + - ${path.choose:${oc.env:SCRATCH_DIR,no_exist},/net/nfs.cirrascale/allennlp/llm-data}/eval-data/perplexity/v2_small_gptneox20b/ice/val.npy + m2d2_s2orc-validation: + - ${path.choose:${oc.env:SCRATCH_DIR,no_exist},/net/nfs.cirrascale/allennlp/llm-data}/eval-data/perplexity/v2_small_gptneox20b/m2d2_s2orc/val.npy + m2d2_wiki-validation: + - ${path.choose:${oc.env:SCRATCH_DIR,no_exist},/net/nfs.cirrascale/allennlp/llm-data}/eval-data/perplexity/v2_small_gptneox20b/m2d2_wiki/val.npy + manosphere-validation: + - ${path.choose:${oc.env:SCRATCH_DIR,no_exist},/net/nfs.cirrascale/allennlp/llm-data}/eval-data/perplexity/v2_small_gptneox20b/manosphere/val.npy + mc4_en-validation: + - ${path.choose:${oc.env:SCRATCH_DIR,no_exist},/net/nfs.cirrascale/allennlp/llm-data}/eval-data/perplexity/v2_small_gptneox20b/mc4_en/val.npy + pile-validation: + - ${path.choose:${oc.env:SCRATCH_DIR,no_exist},/net/nfs.cirrascale/allennlp/llm-data}/eval-data/perplexity/v2_small_gptneox20b/pile/val.npy + ptb-validation: + - ${path.choose:${oc.env:SCRATCH_DIR,no_exist},/net/nfs.cirrascale/allennlp/llm-data}/eval-data/perplexity/v2_small_gptneox20b/ptb/val.npy + twitterAEE-validation: + - ${path.choose:${oc.env:SCRATCH_DIR,no_exist},/net/nfs.cirrascale/allennlp/llm-data}/eval-data/perplexity/v2_small_gptneox20b/twitterAEE/val.npy + wikitext_103-validation: + - ${path.choose:${oc.env:SCRATCH_DIR,no_exist},/net/nfs.cirrascale/allennlp/llm-data}/eval-data/perplexity/v2_small_gptneox20b/wikitext_103/val.npy + drop_last: true + + ########################## + # Downstream evaluations # + ########################## + - label: piqa + type: downstream + + - label: hellaswag + type: downstream + + - label: winogrande + type: downstream + + - label: openbook_qa + type: downstream + + # - label: boolq # requires implemention of the pmi_dc matrix + # type: downstream + + - label: sciq + type: downstream + + - label: arc_easy + type: downstream + + # - label: arc_challenge # requires implemention of the pmi_dc matrix + # type: downstream + + - label: copa + type: downstream + + - label: rte + type: downstream + + - label: commitment_bank + type: downstream + + - label: mrpc + type: downstream + + - label: sst2 + type: downstream diff --git a/configs/v1_5-mix-medium-mitch-ish-mcli.yaml b/configs/v1_5-mix-medium-mitch-ish-mcli.yaml new file mode 100644 index 000000000..4588be0ac --- /dev/null +++ b/configs/v1_5-mix-medium-mitch-ish-mcli.yaml @@ -0,0 +1,622 @@ +run_name: v1_5-mix-medium-mitch-ish +seed: 6198 +dry_run: false + +wandb: + name: ${run_name} + project: olmo-medium + group: v1_5-mix + +model: + d_model: 4096 + n_heads: 32 + n_layers: 32 + # mlp_ratio: 6 + mlp_hidden_size: 22016 + weight_tying: false + alibi: false + rope: true + flash_attention: true + attention_dropout: 0.0 + attention_layer_norm: false + multi_query_attention: false + include_bias: false + block_type: sequential + layer_norm_type: default + layer_norm_with_affine: false + bias_for_layer_norm: false + attention_layer_norm_with_affine: false + activation_type: swiglu + residual_dropout: 0.0 + embedding_dropout: 0.0 + max_sequence_length: 2048 + vocab_size: 50280 + embedding_size: 50304 + eos_token_id: 0 + pad_token_id: 1 + init_device: meta + init_fn: mitchell + +compile: + fullgraph: false + +optimizer: + name: adamw + learning_rate: 3.0e-4 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + metrics_log_interval: 10 + +scheduler: + name: linear_with_warmup + t_warmup: 5000 + alpha_f: 0.1 + +tokenizer: + identifier: tokenizers/allenai_eleuther-ai-gpt-neox-20b-pii-special.json + truncate_direction: right + +save_folder: runs/${run_name} +remote_save_folder: s3://ai2-llm/checkpoints/7b/${run_name} +save_overwrite: true +# Sharded checkpoints (best for restarts) +save_interval: 1000 +save_num_checkpoints_to_keep: -1 +# Unsharded checkpoints (for final storage) +save_interval_unsharded: null # getting errors on LUMI right now +save_num_unsharded_checkpoints_to_keep: -1 + +load_path: null + +max_duration: 476837 # 2T tokens +global_train_batch_size: 2048 +device_train_microbatch_size: 2 + +precision: amp_bf16 + +fsdp: + wrapping_strategy: by_block + precision: mixed + +max_grad_norm: 1.0 +max_grad_norm_ratio: null + +speed_monitor: + window_size: 20 + +eval_interval: ${save_interval} +eval_subset_num_batches: -1 +device_eval_batch_size: ${device_train_microbatch_size} +evaluators: + - label: all-small-ppl-validation + data: + num_workers: 0 + drop_last: true + # pin_memory: true + # prefetch_factor: 1 + # persistent_workers: false + # timeout: 0 + datasets: + 4chan-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/4chan/val.npy + c4_100_domains-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_100_domains/val.npy + c4_en-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_en/val.npy + gab-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/gab/val.npy + ice-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ice/val.npy + m2d2_s2orc-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_s2orc/val.npy + m2d2_wiki-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_wiki/val.npy + manosphere-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/manosphere/val.npy + mc4_en-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/mc4_en/val.npy + pile-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/pile/val.npy + ptb-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ptb/val.npy + twitterAEE-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/twitterAEE/val.npy + wikitext_103-validation: + - s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/wikitext_103/val.npy + + ########################## + # Downstream evaluations # + ########################## + - label: piqa + type: downstream + + - label: hellaswag + type: downstream + + - label: winogrande + type: downstream + + - label: openbook_qa + type: downstream + + # - label: boolq # requires implemention of the pmi_dc matrix + # type: downstream + + - label: sciq + type: downstream + + - label: arc_easy + type: downstream + + # - label: arc_challenge # requires implemention of the pmi_dc matrix + # type: downstream + + - label: copa + type: downstream + + - label: rte + type: downstream + + - label: commitment_bank + type: downstream + + - label: mrpc + type: downstream + + - label: sst2 + type: downstream + +data: + pad_direction: right + num_workers: 16 + drop_last: true + pin_memory: true + prefetch_factor: 1 + persistent_workers: true + timeout: 0 + paths: + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-000-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-000-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-001-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-001-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-002-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-002-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-003-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-003-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-004-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-004-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-005-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-005-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-006-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-006-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-006-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-007-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-007-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-008-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-008-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-008-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-009-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-009-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-010-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-010-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-010-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-011-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-011-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-012-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-012-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-013-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-013-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-013-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-014-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-014-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-014-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-015-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-015-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-016-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-016-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-017-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-017-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-018-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-018-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-019-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-019-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-020-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-020-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-021-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-021-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-022-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-022-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-023-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-023-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-024-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-024-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-025-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-025-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-025-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-026-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-026-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-027-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-027-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-027-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-028-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-028-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-028-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-029-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-029-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-030-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-030-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-031-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-031-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-032-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-032-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-033-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-033-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-033-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-034-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-034-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-034-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-035-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-035-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-036-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-036-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-037-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-037-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-038-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-038-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-039-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-039-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-040-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-040-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-041-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-041-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-042-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-042-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-042-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-043-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-043-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-043-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-044-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-044-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-044-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-045-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-045-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-046-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-046-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-046-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-046-00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-047-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-047-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-048-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-048-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-049-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-049-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-050-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-050-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-051-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-051-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-052-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-052-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-052-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-053-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-053-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-053-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-054-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-054-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-055-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-055-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-055-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-056-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-056-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-056-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-057-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-057-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-057-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-058-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-058-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-059-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-059-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-060-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-060-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-061-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-061-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-062-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-062-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-062-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-063-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-063-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-063-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-064-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-064-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-064-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-065-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-065-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-065-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-066-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-066-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-067-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-067-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-068-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-068-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-069-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-069-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-070-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-070-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-071-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-071-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-072-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-072-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-073-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-073-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-074-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-074-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-075-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-075-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-076-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-076-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-077-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-077-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-078-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-078-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-079-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-079-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-080-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-080-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-081-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-081-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-082-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-082-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-083-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-083-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-084-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-084-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-085-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-085-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-086-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-086-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-087-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-087-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-088-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-088-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-089-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-089-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-089-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-090-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-090-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-091-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-091-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-091-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-092-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-092-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-093-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-093-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-093-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-094-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-094-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-094-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-095-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-095-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-096-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-096-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-097-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-097-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-097-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-098-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-098-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-099-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-099-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-100-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-100-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-100-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-101-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-101-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-102-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-102-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-103-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-103-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-104-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-104-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-105-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-105-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-106-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-106-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-106-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-107-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-107-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-108-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-108-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-109-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-109-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-109-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-110-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-110-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-110-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-111-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-111-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-112-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-112-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-113-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-113-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-114-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-114-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-114-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-115-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-115-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-116-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-116-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-117-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-117-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-118-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-118-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-119-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-119-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-120-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-120-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-120-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-121-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-121-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-122-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-122-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-122-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-123-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-123-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-123-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-124-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-124-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-125-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-125-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-126-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-126-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-127-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-127-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-127-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-128-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-128-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-129-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-129-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-129-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-130-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-130-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-131-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-131-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-132-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-132-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-133-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-133-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-133-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-134-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-134-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-134-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-135-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-135-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-135-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-136-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-136-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-137-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-137-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-137-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-138-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-138-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-139-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-139-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-140-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-140-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-141-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-141-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-141-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-142-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-142-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-142-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-143-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-143-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-144-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-144-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-144-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-145-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-145-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-145-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-146-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-146-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-146-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-147-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-147-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-147-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-148-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-148-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-149-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-149-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-149-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-150-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-150-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-150-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-150-00003.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-151-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-151-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-152-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-152-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-153-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-153-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-154-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-154-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-155-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-155-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-155-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-156-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-156-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-157-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-157-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-157-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-158-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-158-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-159-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-159-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-160-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-160-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-161-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-161-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-161-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-162-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-162-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-163-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-163-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-164-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-164-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-165-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-165-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-165-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-166-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-166-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-166-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-167-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-167-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-167-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-168-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-168-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-169-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-169-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-170-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-170-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-171-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-171-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-172-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-172-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-173-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-173-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-173-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-174-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-174-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-174-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-175-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-175-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-175-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-176-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-176-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-176-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-177-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-177-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-178-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-178-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-179-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-179-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-180-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-180-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-181-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-181-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-182-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-182-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-182-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-183-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-183-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-183-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-184-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-184-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-185-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-185-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-185-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-186-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-186-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-186-00002.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-187-00000.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-187-00001.npy + - s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-187-00002.npy diff --git a/configs/v1_5-mix-medium-mitch-ish.yaml b/configs/v1_5-mix-medium-mitch-ish.yaml new file mode 100644 index 000000000..a8f359c50 --- /dev/null +++ b/configs/v1_5-mix-medium-mitch-ish.yaml @@ -0,0 +1,177 @@ +run_name: v1_5-mix-medium-mitch-ish +seed: 6198 +dry_run: false + +wandb: + name: ${run_name} + project: olmo-medium + group: v1_5-mix + +model: + d_model: 4096 + n_heads: 32 + n_layers: 32 + # mlp_ratio: 6 + mlp_hidden_size: 22016 + weight_tying: false + alibi: false + rope: true + flash_attention: false # not available on AMD + attention_dropout: 0.0 + attention_layer_norm: false + multi_query_attention: false + include_bias: false + block_type: sequential + layer_norm_type: default + layer_norm_with_affine: false + bias_for_layer_norm: false + attention_layer_norm_with_affine: false + activation_type: swiglu + residual_dropout: 0.0 + embedding_dropout: 0.0 + max_sequence_length: 2048 + vocab_size: 50280 + embedding_size: 50304 + eos_token_id: 0 + pad_token_id: 1 + init_device: meta + init_fn: mitchell + +compile: null + +optimizer: + name: adamw + learning_rate: 3.0e-4 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + metrics_log_interval: 10 + +scheduler: + name: cosine_with_warmup + t_warmup: 5000 + alpha_f: 0.1 + +tokenizer: + identifier: tokenizers/allenai_eleuther-ai-gpt-neox-20b-pii-special.json + truncate_direction: right + +save_folder: ${path.choose:${oc.env:FLASH_DIR,no_exist}/checkpoints,/results}/${oc.env:SLURM_JOB_ID,${run_name}} +save_overwrite: false +# Sharded checkpoints (best for restarts) +save_interval: 1000 +save_num_checkpoints_to_keep: -1 +# Unsharded checkpoints (for final storage) +save_interval_unsharded: null # getting errors on LUMI right now +save_num_unsharded_checkpoints_to_keep: -1 + +load_path: null + +max_duration: 476837 # 2T tokens +global_train_batch_size: 2048 +device_train_microbatch_size: 2 + +precision: amp_bf16 + +fsdp: + wrapping_strategy: null + precision: mixed + +max_grad_norm: 1.0 +max_grad_norm_ratio: null + +speed_monitor: + window_size: 20 + +eval_interval: ${save_interval} +eval_subset_num_batches: -1 +device_eval_batch_size: ${device_train_microbatch_size} +evaluators: + - label: all-small-ppl-validation + data: + num_workers: 0 + drop_last: true + # pin_memory: true + # prefetch_factor: 1 + # persistent_workers: false + # timeout: 0 + datasets: + 4chan-validation: + - ${path.choose:${oc.env:SCRATCH_DIR,no_exist},/net/nfs.cirrascale/allennlp/llm-data}/eval-data/perplexity/v2_small_gptneox20b/4chan/val.npy + c4_100_domains-validation: + - ${path.choose:${oc.env:SCRATCH_DIR,no_exist},/net/nfs.cirrascale/allennlp/llm-data}/eval-data/perplexity/v2_small_gptneox20b/c4_100_domains/val.npy + c4_en-validation: + - ${path.choose:${oc.env:SCRATCH_DIR,no_exist},/net/nfs.cirrascale/allennlp/llm-data}/eval-data/perplexity/v2_small_gptneox20b/c4_en/val.npy + gab-validation: + - ${path.choose:${oc.env:SCRATCH_DIR,no_exist},/net/nfs.cirrascale/allennlp/llm-data}/eval-data/perplexity/v2_small_gptneox20b/gab/val.npy + ice-validation: + - ${path.choose:${oc.env:SCRATCH_DIR,no_exist},/net/nfs.cirrascale/allennlp/llm-data}/eval-data/perplexity/v2_small_gptneox20b/ice/val.npy + m2d2_s2orc-validation: + - ${path.choose:${oc.env:SCRATCH_DIR,no_exist},/net/nfs.cirrascale/allennlp/llm-data}/eval-data/perplexity/v2_small_gptneox20b/m2d2_s2orc/val.npy + m2d2_wiki-validation: + - ${path.choose:${oc.env:SCRATCH_DIR,no_exist},/net/nfs.cirrascale/allennlp/llm-data}/eval-data/perplexity/v2_small_gptneox20b/m2d2_wiki/val.npy + manosphere-validation: + - ${path.choose:${oc.env:SCRATCH_DIR,no_exist},/net/nfs.cirrascale/allennlp/llm-data}/eval-data/perplexity/v2_small_gptneox20b/manosphere/val.npy + mc4_en-validation: + - ${path.choose:${oc.env:SCRATCH_DIR,no_exist},/net/nfs.cirrascale/allennlp/llm-data}/eval-data/perplexity/v2_small_gptneox20b/mc4_en/val.npy + pile-validation: + - ${path.choose:${oc.env:SCRATCH_DIR,no_exist},/net/nfs.cirrascale/allennlp/llm-data}/eval-data/perplexity/v2_small_gptneox20b/pile/val.npy + ptb-validation: + - ${path.choose:${oc.env:SCRATCH_DIR,no_exist},/net/nfs.cirrascale/allennlp/llm-data}/eval-data/perplexity/v2_small_gptneox20b/ptb/val.npy + twitterAEE-validation: + - ${path.choose:${oc.env:SCRATCH_DIR,no_exist},/net/nfs.cirrascale/allennlp/llm-data}/eval-data/perplexity/v2_small_gptneox20b/twitterAEE/val.npy + wikitext_103-validation: + - ${path.choose:${oc.env:SCRATCH_DIR,no_exist},/net/nfs.cirrascale/allennlp/llm-data}/eval-data/perplexity/v2_small_gptneox20b/wikitext_103/val.npy + + ########################## + # Downstream evaluations # + ########################## + - label: piqa + type: downstream + + - label: hellaswag + type: downstream + + - label: winogrande + type: downstream + + - label: openbook_qa + type: downstream + + # - label: boolq # requires implemention of the pmi_dc matrix + # type: downstream + + - label: sciq + type: downstream + + - label: arc_easy + type: downstream + + # - label: arc_challenge # requires implemention of the pmi_dc matrix + # type: downstream + + - label: copa + type: downstream + + - label: rte + type: downstream + + - label: commitment_bank + type: downstream + + - label: mrpc + type: downstream + + - label: sst2 + type: downstream + +data: + paths: ${path.glob:${oc.env:FLASH_DIR,no_exist}/preprocessed/olmo-mix/v1_5/gpt-neox-20b-pii-special/*.npy} + pad_direction: right + num_workers: 0 + drop_last: true + pin_memory: true + prefetch_factor: 16 + persistent_workers: true + timeout: 0 diff --git a/olmo/config.py b/olmo/config.py index 1a861f92a..b327ac90c 100644 --- a/olmo/config.py +++ b/olmo/config.py @@ -238,6 +238,12 @@ class ModelConfig(BaseConfig): mlp_ratio: int = 4 """ The ratio of the inner MLP dimensionality to ``d_model``. + This is only used when ``mlp_hidden_size`` is not set. + """ + + mlp_hidden_size: Optional[int] = None + """ + Set the exact hidden size for the MLP. Otherwise the inner MLP hidden size will be set to `mlp_ratio * d_model`. """ activation_type: ActivationType = ActivationType.swiglu @@ -353,6 +359,11 @@ class ModelConfig(BaseConfig): substantially. """ + weight_tying: bool = True + """ + Whether to tie output linear weights to the input embedding. + """ + eos_token_id: int = 50256 """ The ID of the end-of-sentence special token. @@ -680,6 +691,11 @@ class TrainConfig(BaseConfig): checkpoint into an unsharded checkpoint. """ + no_pre_train_checkpoint: bool = False + """ + Skip saving pre-train checkpoint. + """ + load_path: Optional[str] = None """ The path to a training checkpoint to restore/resume from. diff --git a/olmo/model.py b/olmo/model.py index 266dc4a7e..ec4c597f8 100644 --- a/olmo/model.py +++ b/olmo/model.py @@ -9,6 +9,7 @@ import logging import math from abc import abstractmethod +from collections.abc import MutableMapping from typing import Dict, List, NamedTuple, Optional, Sequence, Tuple, cast import torch @@ -51,6 +52,24 @@ log = logging.getLogger(__name__) +class BufferCache(dict, MutableMapping[str, torch.Tensor]): + """ + Cache for attention biases and other things that would normally be stored as buffers. + We avoid using buffers because we've run into various issues doing so with FSDP. + In general it appears the way FSDP handles buffers is not well-defined. + It doesn't shard them but apparently it does synchronize them across processes, which we want to avoid + since (A) it isn't necessary, and (B) we sometimes have `-inf` in these biases which might get turned into + NaNs when they're synchronized due to casting or some other issue. + """ + + +def _non_meta_init_device(config: ModelConfig) -> torch.device: + if config.init_device is not None and config.init_device != "meta": + return torch.device(config.init_device) + else: + return torch.device("cuda" if torch.cuda.is_available() else "cpu") + + class Dropout(nn.Dropout): def forward(self, input: torch.Tensor) -> torch.Tensor: if self.p == 0.0: @@ -245,28 +264,61 @@ class RotaryEmbedding(nn.Module): [Rotary positional embeddings (RoPE)](https://arxiv.org/abs/2104.09864). """ - def __init__(self, config: ModelConfig): + def __init__(self, config: ModelConfig, cache: BufferCache): super().__init__() - dim = config.d_model // config.n_heads - inv_freq = 1.0 / (10000 ** (torch.arange(0, dim, 2, device=config.init_device).float() / dim)) - self.register_buffer("inv_freq", inv_freq) - - def forward(self, max_seq_len, *, device): - seq = torch.arange(max_seq_len, device=device, dtype=self.inv_freq.dtype) # type: ignore - freqs = einsum("i , j -> i j", seq, self.inv_freq) - return torch.cat((freqs, freqs), dim=-1) - - -def rotate_half(x: torch.Tensor) -> torch.Tensor: - B, nh, T, hs = x.size() - x = x.view(B, nh, T, 2, hs // 2) - x1, x2 = x.unbind(dim=-2) - return torch.cat((-x2, x1), dim=-1) - + self.config = config + self.__cache = cache + # Warm up cache. + self.get_rotary_embedding(config.max_sequence_length, _non_meta_init_device(config)) -def apply_rotary_pos_emb(pos: torch.Tensor, t: torch.Tensor) -> torch.Tensor: - out = (t * pos.cos()) + (rotate_half(t) * pos.sin()) - return out.to(t.dtype) + def get_rotary_embedding(self, seq_len: int, device: torch.device) -> Tuple[torch.Tensor, torch.Tensor]: + if ( + (pos_sin := self.__cache.get("rope_pos_sin")) is not None + and (pos_cos := self.__cache.get("rope_pos_cos")) is not None + and pos_sin.shape[-2] >= seq_len + and pos_cos.shape[-2] >= seq_len + ): + if pos_sin.device != device: + pos_sin = pos_sin.to(device) + self.__cache["rope_pos_sin"] = pos_sin + if pos_cos.device != device: + pos_cos = pos_cos.to(device) + self.__cache["rope_pos_cos"] = pos_cos + return pos_sin[:, :, :seq_len, :], pos_cos[:, :, :seq_len, :] + + with torch.autocast(device.type, enabled=False): + dim = self.config.d_model // self.config.n_heads + inv_freq = 1.0 / (10000 ** (torch.arange(0, dim, 2, device=device, dtype=torch.float) / dim)) + seq = torch.arange(seq_len, device=device, dtype=torch.float) + freqs = einsum("i , j -> i j", seq, inv_freq) + positions = torch.cat((freqs, freqs), dim=-1) + pos_sin, pos_cos = positions.sin()[None, None, :, :], positions.cos()[None, None, :, :] + self.__cache["rope_pos_sin"] = pos_sin + self.__cache["rope_pos_cos"] = pos_cos + return pos_sin, pos_cos + + def rotate_half(self, x: torch.Tensor) -> torch.Tensor: + B, nh, T, hs = x.size() + x = x.view(B, nh, T, 2, hs // 2) + x1, x2 = x.unbind(dim=-2) + return torch.cat((-x2, x1), dim=-1) + + def apply_rotary_pos_emb(self, pos_sin: torch.Tensor, pos_cos: torch.Tensor, t: torch.Tensor) -> torch.Tensor: + out = (t * pos_cos) + (self.rotate_half(t) * pos_sin) + return out.to(t.dtype) + + def forward(self, q: torch.Tensor, k: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + q_, k_ = q.float(), k.float() + with torch.autocast(q.device.type, enabled=False): + query_len, key_len = q_.shape[-2], k_.shape[-2] # could be different if layer_past not None + pos_sin, pos_cos = self.get_rotary_embedding(key_len, q_.device) + q_ = self.apply_rotary_pos_emb( + pos_sin[:, :, key_len - query_len : key_len, :], + pos_cos[:, :, key_len - query_len : key_len, :], + q_, + ) + k_ = self.apply_rotary_pos_emb(pos_sin, pos_cos, k_) + return q_.type_as(q), k_.type_as(k) class Activation(nn.Module): @@ -322,10 +374,14 @@ class OlmoBlock(nn.Module): A base class for transformer block implementations. """ - def __init__(self, layer_id: int, config: ModelConfig): + def __init__(self, layer_id: int, config: ModelConfig, cache: BufferCache): super().__init__() self.layer_id = layer_id self.config = config + self.hidden_size = ( + config.mlp_hidden_size if config.mlp_hidden_size is not None else config.mlp_ratio * config.d_model + ) + self.__cache = cache assert config.d_model % config.n_heads == 0 # Dropout. @@ -344,7 +400,7 @@ def __init__(self, layer_id: int, config: ModelConfig): # Activation function. self.act = Activation.build(config) - assert (self.act.output_multiplier * config.mlp_ratio * config.d_model) % 1 == 0 + assert (self.act.output_multiplier * self.hidden_size) % 1 == 0 # Attention output projection. self.attn_out = nn.Linear( @@ -353,7 +409,7 @@ def __init__(self, layer_id: int, config: ModelConfig): # Feed-forward output projection. self.ff_out = nn.Linear( - int(self.act.output_multiplier * config.mlp_ratio * config.d_model), + int(self.act.output_multiplier * self.hidden_size), config.d_model, bias=config.include_bias, device=config.init_device, @@ -362,10 +418,7 @@ def __init__(self, layer_id: int, config: ModelConfig): # Rotary embeddings. if self.config.rope: - self.rotary_emb = RotaryEmbedding(config) - self.register_buffer( - "pos_emb", self.rotary_emb(config.max_sequence_length, device=config.init_device), persistent=False - ) + self.rotary_emb = RotaryEmbedding(config, self.__cache) def reset_parameters(self): if self.k_norm is not None: @@ -385,14 +438,6 @@ def reset_parameters(self): layer_id=self.layer_id, ) - def get_rotary_embedding(self, seq_len: int, device: Optional[torch.device]) -> torch.Tensor: - if self.pos_emb is not None and self.pos_emb.shape[-2] >= seq_len: # type: ignore - return self.pos_emb[:seq_len] # type: ignore - - pos_emb = self.rotary_emb(seq_len, device=device) - self.register_buffer("pos_emb", pos_emb, persistent=False) - return pos_emb - def attention( self, q: torch.Tensor, @@ -438,9 +483,7 @@ def attention( if self.config.rope: # Apply rotary embeddings. - positions = self.get_rotary_embedding(key_len, q.device) - q = apply_rotary_pos_emb(positions[key_len - query_len : key_len], q) - k = apply_rotary_pos_emb(positions, k) + q, k = self.rotary_emb(q, k) if attention_bias is not None: attention_bias = attention_bias[:, :, key_len - query_len : key_len, :key_len] @@ -471,11 +514,11 @@ def forward( raise NotImplementedError @classmethod - def build(cls, layer_id: int, config: ModelConfig) -> OlmoBlock: + def build(cls, layer_id: int, config: ModelConfig, cache: BufferCache) -> OlmoBlock: if config.block_type == BlockType.sequential: - return OlmoSequentialBlock(layer_id, config) + return OlmoSequentialBlock(layer_id, config, cache) elif config.block_type == BlockType.parallel: - return OlmoParallelBlock(layer_id, config) + return OlmoParallelBlock(layer_id, config, cache) else: raise NotImplementedError(f"not sure how to handle block type '{config.block_type}'") @@ -486,8 +529,8 @@ class OlmoSequentialBlock(OlmoBlock): (plus another skip connection). """ - def __init__(self, layer_id: int, config: ModelConfig): - super().__init__(layer_id, config) + def __init__(self, layer_id: int, config: ModelConfig, cache: BufferCache): + super().__init__(layer_id, config, cache) # Layer norms. self.attn_norm = LayerNorm.build(config) self.ff_norm = LayerNorm.build(config) @@ -501,7 +544,7 @@ def __init__(self, layer_id: int, config: ModelConfig): ) # Feed-forward input projection. self.ff_proj = nn.Linear( - config.d_model, config.mlp_ratio * config.d_model, bias=config.include_bias, device=config.init_device + config.d_model, self.hidden_size, bias=config.include_bias, device=config.init_device ) def reset_parameters(self): @@ -551,8 +594,8 @@ class OlmoParallelBlock(OlmoBlock): to fuse the output projections, but we found that didn't help. """ - def __init__(self, layer_id: int, config: ModelConfig): - super().__init__(layer_id, config) + def __init__(self, layer_id: int, config: ModelConfig, cache: BufferCache): + super().__init__(layer_id, config, cache) self.norm = LayerNorm.build(config) # Fused attention and feed-forward projection. # NOTE: we could also fuse the attention and feed-forward output projections @@ -564,10 +607,10 @@ def __init__(self, layer_id: int, config: ModelConfig): config.d_model, config.d_model // config.n_heads, config.d_model // config.n_heads, - config.mlp_ratio * config.d_model, + self.hidden_size, ) else: - self.fused_dims = (config.d_model, config.d_model, config.d_model, config.mlp_ratio * config.d_model) + self.fused_dims = (config.d_model, config.d_model, config.d_model, self.hidden_size) self.fused_attn_ff_proj = nn.Linear( config.d_model, sum(self.fused_dims), bias=config.include_bias, device=config.init_device ) @@ -590,7 +633,7 @@ def forward( # - for regular attn q, k, v: (batch_size, seq_len, d_model) # - for multi-query attn q: (batch_size, seq_len, d_model) # k, v: (batch_size, seq_len, d_model // n_heads) - # shape of ff: (batch_size, seq_len, mlp_ratio x d_model) + # shape of ff: (batch_size, seq_len, hidden_size) q, k, v, ff = self.fused_attn_ff_proj(self.norm(x)).split(self.fused_dims, dim=-1) # Get attention scores. @@ -629,22 +672,20 @@ class OlmoGenerateOutput(NamedTuple): """ -def causal_attention_bias(config: ModelConfig, device: torch.device) -> torch.FloatTensor: - size = config.max_sequence_length +def causal_attention_bias(seq_len: int, device: torch.device) -> torch.FloatTensor: att_bias = torch.triu( - torch.ones(size, size, device=device, dtype=torch.float), + torch.ones(seq_len, seq_len, device=device, dtype=torch.float), diagonal=1, ) att_bias.masked_fill_(att_bias == 1, float("-inf")) - return att_bias.view(1, 1, size, size) # type: ignore + return att_bias.view(1, 1, seq_len, seq_len) # type: ignore -def alibi_attention_bias(config: ModelConfig, device: torch.device) -> torch.FloatTensor: - size = config.max_sequence_length - alibi_bias = torch.arange(1 - size, 1, dtype=torch.float, device=device).view(1, 1, 1, size) +def alibi_attention_bias(seq_len: int, config: ModelConfig, device: torch.device) -> torch.FloatTensor: + alibi_bias = torch.arange(1 - seq_len, 1, dtype=torch.float, device=device).view(1, 1, 1, seq_len) # shape: (1, 1, seq_len, seq_len) - alibi_bias = alibi_bias - torch.arange(1 - size, 1, dtype=torch.float, device=device).view(1, 1, size, 1) + alibi_bias = alibi_bias - torch.arange(1 - seq_len, 1, dtype=torch.float, device=device).view(1, 1, seq_len, 1) alibi_bias.abs_().mul_(-1) # shape: (n_heads,) @@ -659,6 +700,7 @@ class Olmo(nn.Module): def __init__(self, config: ModelConfig, init_params: bool = True): super().__init__() self.config = config + self.__cache = BufferCache() # Validate config. if self.config.alibi and self.config.flash_attention: @@ -686,7 +728,7 @@ def __init__(self, config: ModelConfig, init_params: bool = True): config.embedding_size or config.vocab_size, config.d_model, device=config.init_device ), emb_drop=Dropout(config.embedding_dropout), - blocks=nn.ModuleList([OlmoBlock.build(i, config) for i in range(config.n_layers)]), + blocks=nn.ModuleList([OlmoBlock.build(i, config, self.__cache) for i in range(config.n_layers)]), ln_f=LayerNorm.build(config), ) ) @@ -694,25 +736,34 @@ def __init__(self, config: ModelConfig, init_params: bool = True): self.transformer.update( {"wpe": nn.Embedding(config.max_sequence_length, config.d_model, device=config.init_device)} ) + if not config.weight_tying: + self.transformer.update( + { + "ff_out": nn.Linear( + config.d_model, + config.embedding_size or config.vocab_size, + bias=config.include_bias, + device=config.init_device, + ) + } + ) # When `init_device="meta"` FSDP will call `reset_parameters()` to initialize weights. if init_params and self.config.init_device != "meta": self.reset_parameters() self.__num_fwd_flops: Optional[int] = None - # Attention bias cache. - # We could cache these as buffers, but we've run into various issues doing that with FSDP. - # In general it appears the way FSDP handles buffers is not well-defined. - # It doesn't shard them but apparently it does synchronize them across processes, which we want to avoid - # since (A) it isn't necessary, and (B) we have `-inf` in these biases which might get turned into - # NaNs when they're synchronized due to casting or some other issue. - self.__bias_cache: Dict[str, Optional[torch.FloatTensor]] = { - "causal_attention_bias": None, - "alibi_attention_bias": None, - } + # Warm up cache. if self.config.alibi: - # Warm up cache. - self.causal_attention_bias - self.alibi_attention_bias + self.get_causal_attention_bias(config.max_sequence_length, _non_meta_init_device(config)) + self.get_alibi_attention_bias(config.max_sequence_length, _non_meta_init_device(config)) + + @property + def device(self) -> torch.device: + device: torch.device = self.transformer.wte.weight.device # type: ignore + if device.type == "meta": + return _non_meta_init_device(self.config) + else: + return device def reset_parameters(self): log.info("Initializing model parameters...") @@ -728,42 +779,39 @@ def reset_parameters(self): # Top-level layer norm. self.transformer.ln_f.reset_parameters() # type: ignore + # Output weights. + if hasattr(self.transformer, "ff_out"): + init_weights(self.config, self.transformer.ff_out) # type: ignore + # Let the blocks handle themselves. for block in self.transformer.blocks: # type: ignore block.reset_parameters() # type: ignore - @property - def device(self) -> torch.device: - device: torch.device = self.transformer.wte.weight.device # type: ignore - if device.type == "meta": - if self.config.init_device is not None and self.config.init_device != "meta": - return torch.device(self.config.init_device) - else: - return torch.device("cuda" if torch.cuda.is_available() else "cpu") - else: - return device - - @property - def causal_attention_bias(self) -> torch.FloatTensor: - causal_bias = self.__bias_cache["causal_attention_bias"] - if causal_bias is None: - causal_bias = causal_attention_bias(self.config, self.device) - self.__bias_cache["causal_attention_bias"] = causal_bias - elif causal_bias.device != self.device: # in case model was moved to different device - causal_bias = causal_bias.to(device=self.device) - self.__bias_cache["causal_attention_bias"] = causal_bias # type: ignore - return causal_bias # type: ignore - - @property - def alibi_attention_bias(self) -> torch.FloatTensor: - alibi_bias = self.__bias_cache["alibi_attention_bias"] - if alibi_bias is None: - alibi_bias = alibi_attention_bias(self.config, self.device) - self.__bias_cache["alibi_attention_bias"] = alibi_bias - elif alibi_bias.device != self.device: # in case model was moved to different device - alibi_bias = alibi_bias.to(device=self.device) - self.__bias_cache["alibi_attention_bias"] = alibi_bias # type: ignore - return alibi_bias # type: ignore + def get_causal_attention_bias(self, seq_len: int, device: torch.device) -> torch.Tensor: + if (causal_bias := self.__cache.get("causal_attention_bias")) is not None and causal_bias.shape[ + -1 + ] >= seq_len: + if causal_bias.device != device: + causal_bias = causal_bias.to(device) + self.__cache["causal_attention_bias"] = causal_bias + return causal_bias + with torch.autocast(device.type, enabled=False): + causal_bias = causal_attention_bias(seq_len, device) + self.__cache["causal_attention_bias"] = causal_bias + return causal_bias + + def get_alibi_attention_bias(self, seq_len: int, device: torch.device) -> torch.Tensor: + if (alibi_bias := self.__cache.get("alibi_attention_bias")) is not None and alibi_bias.shape[ + -1 + ] >= seq_len: + if alibi_bias.device != device: + alibi_bias = alibi_bias.to(device) + self.__cache["alibi_attention_bias"] = alibi_bias + return alibi_bias + with torch.autocast(device.type, enabled=False): + alibi_bias = alibi_attention_bias(seq_len, self.config, device) + self.__cache["alibi_attention_bias"] = alibi_bias + return alibi_bias def forward( self, @@ -806,10 +854,10 @@ def forward( assert len(past_key_values) == self.config.n_layers batch_size, seq_len = input_ids.size() - assert seq_len <= self.config.max_sequence_length, ( - f"Cannot forward input with seq_len={seq_len}, " - f"this model only supports seq_len<={self.config.max_sequence_length}" - ) + if past_key_values is None: + past_length = 0 + else: + past_length = past_key_values[0][0].size(-2) # Get embeddings of input. # shape: (batch_size, seq_len, d_model) @@ -817,10 +865,6 @@ def forward( if not (self.config.alibi or self.config.rope): # Get positional embeddings. - if past_key_values is None: - past_length = 0 - else: - past_length = past_key_values[0][0].size(-2) # shape: (1, seq_len) pos = torch.arange( past_length, past_length + seq_len, dtype=torch.long, device=input_ids.device @@ -838,7 +882,9 @@ def forward( # shape: (batch_size, 1, 1, seq_len) attention_mask = attention_mask.to(dtype=x.dtype).view(batch_size, -1)[:, None, None, :] attention_mask = (1.0 - attention_mask) * torch.finfo(attention_mask.dtype).min - attention_mask.masked_fill_(attention_mask == 1.0, float("-inf")) + # TODO: fill w/ -inf instead? + # attention_mask = 1.0 - attention_mask + # attention_mask.masked_fill_(attention_mask == 1.0, float("-inf")) # Merge attention mask with attention bias. if ( @@ -851,12 +897,14 @@ def forward( or past_key_values is not None ): if attention_bias is None and self.config.alibi: - attention_bias = self.causal_attention_bias + self.alibi_attention_bias + attention_bias = self.get_causal_attention_bias( + past_length + seq_len, x.device + ) + self.get_alibi_attention_bias(past_length + seq_len, x.device) elif attention_bias is None: - attention_bias = self.causal_attention_bias + attention_bias = self.get_causal_attention_bias(past_length + seq_len, x.device) elif attention_bias.dtype in (torch.int8, torch.bool): attention_bias = attention_bias.to(dtype=x.dtype) - attention_bias.masked_fill_(attention_bias == 0.0, float("-inf")) + attention_bias.masked_fill_(attention_bias == 0.0, torch.finfo(attention_bias.dtype).min) # Transform to the right shape and data type. mask_len = seq_len @@ -893,7 +941,10 @@ def forward( # Get logits. # shape: (batch_size, seq_len or 1, vocab_size) - logits = F.linear(x, self.transformer.wte.weight, None) # type: ignore + if self.config.weight_tying: + logits = F.linear(x, self.transformer.wte.weight, None) # type: ignore + else: + logits = self.transformer.ff_out(x) # type: ignore if self.config.scale_logits: logits.mul_(1 / math.sqrt(self.config.d_model)) diff --git a/olmo/train.py b/olmo/train.py index b55c7c284..b491e4ecf 100644 --- a/olmo/train.py +++ b/olmo/train.py @@ -672,7 +672,9 @@ def restore_checkpoint( ) elif checkpoint_type == CheckpointType.sharded or checkpoint_type is None: try: - legacy_mode = resource_path(load_path, f"rank{get_global_rank()}.pt").is_file() + legacy_mode = resource_path( + load_path, f"rank{get_global_rank()}.pt", local_cache=local_cache + ).is_file() except FileNotFoundError: legacy_mode = False if legacy_mode: diff --git a/olmo/util.py b/olmo/util.py index 4c61a658f..5542ae1f5 100644 --- a/olmo/util.py +++ b/olmo/util.py @@ -492,7 +492,11 @@ def _gcs_get_bytes_range(bucket_name: str, key: str, bytes_start: int, num_bytes return blob.download_as_bytes(start=bytes_start, end=bytes_start + num_bytes - 1) -s3_client = boto3.client("s3", config=Config(retries={"max_attempts": 10, "mode": "standard"})) +s3_client = boto3.client( + "s3", + config=Config(retries={"max_attempts": 10, "mode": "standard"}), + use_ssl=not int(os.environ.get("OLMO_NO_SSL", "0")), +) def _wait_before_retry(attempt: int): diff --git a/scripts/train.py b/scripts/train.py index b48e7fa00..7066bc098 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -25,6 +25,7 @@ from olmo.util import ( barrier, clean_opt, + get_default_device, get_global_rank, get_local_rank, get_world_size, @@ -124,7 +125,7 @@ def main(cfg: TrainConfig) -> None: if version.parse(torch.__version__) >= version.parse("2.1.0"): # This prevents any parameters from being initialized twice def dummy_init_fn(module: torch.nn.Module) -> None: - module.to_empty(device=get_local_rank()) + module.to_empty(device=get_default_device()) param_init_fn = dummy_init_fn else: @@ -193,7 +194,7 @@ def dummy_init_fn(module: torch.nn.Module) -> None: evaluators=evaluators, indices_file=indices_file, ) as trainer: - if not cfg.dry_run and cfg.load_path is None: + if not cfg.dry_run and not cfg.no_pre_train_checkpoint and cfg.load_path is None: checkpoint_type = ( CheckpointType.sharded if cfg.save_num_checkpoints_to_keep != 0 else CheckpointType.unsharded ) diff --git a/tests/model_test.py b/tests/model_test.py index e0d46ea31..0d1bfd6de 100644 --- a/tests/model_test.py +++ b/tests/model_test.py @@ -158,6 +158,7 @@ def test_forward( ): torch.manual_seed(0) torch.use_deterministic_algorithms(True) + device = torch.device("cuda" if cuda else "cpu") train_config.model.alibi = alibi train_config.model.rope = rope @@ -184,7 +185,7 @@ def test_forward( ] ) batch_inputs = { # type: ignore - k: v.to(device=model.device) if isinstance(v, torch.Tensor) else v for k, v in batch_inputs.items() + k: v.to(device=device) if isinstance(v, torch.Tensor) else v for k, v in batch_inputs.items() } # Run forward pass. @@ -192,14 +193,14 @@ def test_forward( with torch.autocast( device_type="cuda" if cuda else "cpu", enabled=use_amp, dtype=None if not use_amp else dtype ): - output1 = model(torch.tensor(input1, device=model.device).unsqueeze(0)) + output1 = model(torch.tensor(input1, device=device).unsqueeze(0)) key_value_cache1 = model( - torch.tensor(input1[:-1], device=model.device).unsqueeze(0), use_cache=True + torch.tensor(input1[:-1], device=device).unsqueeze(0), use_cache=True ).attn_key_values output1_from_cached = model( - torch.tensor(input1[-1:], device=model.device).unsqueeze(0), past_key_values=key_value_cache1 + torch.tensor(input1[-1:], device=device).unsqueeze(0), past_key_values=key_value_cache1 ) - output2 = model(torch.tensor(input2, device=model.device).unsqueeze(0)) + output2 = model(torch.tensor(input2, device=device).unsqueeze(0)) batch_output = model(**batch_inputs) batch_key_value_cache = model( batch_inputs["input_ids"][:, :-1], @@ -279,6 +280,7 @@ def test_backward( train_config: TrainConfig, tokenizer: Tokenizer, alibi: bool, flash_attn: bool, cuda: bool, dtype ): torch.manual_seed(0) + device = torch.device("cuda" if cuda else "cpu") use_amp = dtype in {torch.float16, torch.bfloat16} scaler = None if not (cuda and use_amp) else torch.cuda.amp.GradScaler() @@ -298,7 +300,7 @@ def test_backward( device_type="cuda" if cuda else "cpu", enabled=use_amp, dtype=None if not use_amp else dtype ): # Forward pass to get logits. - input_ids = torch.tensor(tokenizer.encode("My name is OLMo!"), device=model.device).unsqueeze(0) + input_ids = torch.tensor(tokenizer.encode("My name is OLMo!"), device=device).unsqueeze(0) logits = model(input_ids).logits # Compute loss. @@ -316,7 +318,7 @@ def test_backward( for name, parameter in model.named_parameters(): if parameter.requires_grad: assert parameter.grad is not None - zeros = torch.zeros(parameter.size(), device=model.device) + zeros = torch.zeros(parameter.size(), device=device) if (parameter.grad == zeros).all(): raise RuntimeError(f"{name} has zero a gradient!") else: @@ -349,6 +351,7 @@ def test_generate( ): torch.manual_seed(0) torch.use_deterministic_algorithms(True) + device = torch.device("cuda" if cuda else "cpu") # Should always pad left when generating. train_config.data.pad_direction = PaddingDirection.left @@ -373,7 +376,7 @@ def test_generate( ] ) batch_inputs = { # type: ignore - k: v.to(device=model.device) if isinstance(v, torch.Tensor) else v for k, v in batch_inputs.items() + k: v.to(device=device) if isinstance(v, torch.Tensor) else v for k, v in batch_inputs.items() } beam_search_kwargs = dict(beam_size=3, max_steps=5) @@ -382,7 +385,7 @@ def test_generate( device_type="cuda" if cuda else "cpu", enabled=use_amp, dtype=None if not use_amp else dtype ): output1 = model.generate( - torch.tensor(input1, device=model.device).unsqueeze(0), # type: ignore + torch.tensor(input1, device=device).unsqueeze(0), # type: ignore **beam_search_kwargs, ) batch_output = model.generate(**{**batch_inputs, **beam_search_kwargs})