From 148f8fc000257600fa90b09e785db2733032b1cb Mon Sep 17 00:00:00 2001 From: Daniel Kaplan Date: Wed, 3 Apr 2024 10:27:26 -0400 Subject: [PATCH] other --- mytests/test_vision_encoder_vit8.py | 54 +++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 mytests/test_vision_encoder_vit8.py diff --git a/mytests/test_vision_encoder_vit8.py b/mytests/test_vision_encoder_vit8.py new file mode 100644 index 000000000..1094c50f5 --- /dev/null +++ b/mytests/test_vision_encoder_vit8.py @@ -0,0 +1,54 @@ +import torch +# from .vision_encoder import get_vision_encoder +# from dinov2.models import vision_transformer as vits +# from megatron.model.encoders.vision.vision_encoder import get_vision_encoder +import torch +from PIL import Image +import open_clip + +class Args: + def __init__(self): + + pass + +def test_eva_clip_image_transformer_shapes(): + + device = "cuda" + + model, preprocess = open_clip.create_model_from_pretrained('ViT-B-16','hf-hub:timm/vit_base_patch16_224.augreg_in21k') + + for transform in preprocess.transforms: + print(transform) + # preprocess.transforms = preprocess.transforms[3:] + # print(preprocess.transforms) + + visual = model.visual + visual.trunk.output_tokens = True + + visual = visual.to(device) + del model + + image_path = "dog.jpg" + image = preprocess(Image.open(image_path)).unsqueeze(0).to(device) + print(image.shape) + + pooled = visual(image) + + print(pooled.shape) + # print(tokens.shape) + + + +def main(): + + + test_eva_clip_image_transformer_shapes() + # test_eva_clip_image_transformer() + # test_dino_image_frozen_transformer() + # test_dino_image_frozen_lora_transformer() + # test_dino_video_transformer_basic() + +if __name__ == "__main__": + + + main()