From add7b2b951dbc45dabf41e0c09ab23285ee652f7 Mon Sep 17 00:00:00 2001 From: kshitij Date: Thu, 4 Apr 2024 16:55:46 +0200 Subject: [PATCH] changed res --- .../create_unified_interleaved_dataset.py | 8 ++++---- megatron/model/multimodal_encoder.py | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/megatron/data/streaming_dataset/interleaved_text_image/create_unified_interleaved_dataset.py b/megatron/data/streaming_dataset/interleaved_text_image/create_unified_interleaved_dataset.py index 82f8cda14..157269ca7 100644 --- a/megatron/data/streaming_dataset/interleaved_text_image/create_unified_interleaved_dataset.py +++ b/megatron/data/streaming_dataset/interleaved_text_image/create_unified_interleaved_dataset.py @@ -35,7 +35,7 @@ from megatron.tokenizer.tokenizer import build_tokenizer -IMAGE_SIZE = 336 +IMAGE_SIZE = 224 IMAGE_UNDERSTANDING_TEXT_VARIANTS = [ ("Describe this image", " "), @@ -1248,7 +1248,7 @@ def parse_args() -> Namespace: parser.add_argument("--workers", type=int, default=22) # 44 # 80 parser.add_argument("--num_writers", type=int, default=26) # 2 parser.add_argument("--start_ind", type=int, default=0) - parser.add_argument("--end_ind", type=int, default=62) # 150 + parser.add_argument("--end_ind", type=int, default=5000) # 150 parser.add_argument("--tokenizer_type", type=str, required=False, default=None) parser.add_argument("--vocab_file", type=str, required=False, default=None) parser.add_argument("--merge_file", type=str, required=False, default=None) @@ -1288,10 +1288,10 @@ def parse_args() -> Namespace: python megatron/data/streaming_dataset/interleaved_text_image/create_unified_interleaved_dataset.py --path /p/fastdata/mmlaion/hummingbird/SlimPajama-627B/train/chunk2 --dataset_type text --compression zstd --concat_tokens 2048 --tokenizer_type HFTokenizer --vocab_file /p/project/ccstdl/gupta6/multimodal/20B_tokenizer.json --out_root /p/fastdata/mmlaion/hummingbird/hummingbird_dataset_final/text_val_chunk2 5e8, 22, 26 -python megatron/data/streaming_dataset/interleaved_text_image/create_unified_interleaved_dataset.py --path /p/fastdata/mmlaion/datacomp/datacomp_1B/flat --dataset_type datacomp --datacomp_mode understanding --compression zstd --concat_tokens 2048 --tokenizer_type HFTokenizer --vocab_file /p/project/ccstdl/gupta6/multimodal/20B_tokenizer.json --out_root /p/fastdata/mmlaion/hummingbird/hummingbird_dataset_final/datacomp_val_understanding +python megatron/data/streaming_dataset/interleaved_text_image/create_unified_interleaved_dataset.py --path /p/fastdata/mmlaion/datacomp/datacomp_1B/flat --dataset_type datacomp --datacomp_mode understanding --compression zstd --concat_tokens 2048 --tokenizer_type HFTokenizer --vocab_file /p/project/ccstdl/gupta6/multimodal/20B_tokenizer.json --out_root /p/fastdata/mmlaion/hummingbird/hummingbird_dataset_final/datacomp_train_understanding_new 5e8, 22, 26 -python megatron/data/streaming_dataset/interleaved_text_image/create_unified_interleaved_dataset.py --path /p/fastdata/mmlaion/datacomp/datacomp_1B/flat --dataset_type datacomp --datacomp_mode generation --compression zstd --concat_tokens 2048 --tokenizer_type HFTokenizer --vocab_file /p/project/ccstdl/gupta6/multimodal/20B_tokenizer.json --out_root /p/fastdata/mmlaion/hummingbird/hummingbird_dataset_final/datacomp_val_generation +python megatron/data/streaming_dataset/interleaved_text_image/create_unified_interleaved_dataset.py --path /p/fastdata/mmlaion/datacomp/datacomp_1B/flat --dataset_type datacomp --datacomp_mode generation --compression zstd --concat_tokens 2048 --tokenizer_type HFTokenizer --vocab_file /p/project/ccstdl/gupta6/multimodal/20B_tokenizer.json --out_root /p/fastdata/mmlaion/hummingbird/hummingbird_dataset_final/datacomp_train_generation_new 5e8, 30, 18 python megatron/data/streaming_dataset/interleaved_text_image/create_unified_interleaved_dataset.py --path /p/fastdata/mmlaion/OBELICS_parquet --dataset_type obelics --compression zstd --concat_tokens 2048 --tokenizer_type HFTokenizer --vocab_file /p/project/ccstdl/gupta6/multimodal/20B_tokenizer.json --out_root /p/fastdata/mmlaion/hummingbird/hummingbird_dataset_final/obelics_val diff --git a/megatron/model/multimodal_encoder.py b/megatron/model/multimodal_encoder.py index 319b17cd0..413d3c575 100644 --- a/megatron/model/multimodal_encoder.py +++ b/megatron/model/multimodal_encoder.py @@ -12,6 +12,7 @@ "dinov2_large": 1024, "dinov2_small": 384, "openclip": 768, + "evaclip": 1024, } ENCODER_SEQ_LENS = { @@ -19,6 +20,7 @@ "dinov2_large": 257, "dinov2_small": 257, "openclip": 49, + "evaclip": 257, } # MultModal Encoder for Vision and Audio