Merge branch 'main' into run_translation

facebookresearch · Nov 8, 2024 · d4c81c8 · d4c81c8
2 parents 388dfc4 + 0af8e3d
commit d4c81c8
Show file tree

Hide file tree

Showing 3 changed files with 97 additions and 15 deletions.
diff --git a/INSTALL.md b/INSTALL.md
@@ -1,24 +1,30 @@
 ## Installation
 
 ### Requirements
+
 - Linux, CUDA >= 11.7, Python >= 3.8, PyTorch >= 2.0.0 (our setup below is based on CUDA 11.8, Python 3.10, PyTorch 2.0.1; more recent versions should work too, but no guarantees)
 - Conda (anaconda / miniconda work well)
+- visual studio (2019) build tools (Just for windows)
 
 ### Setup
 
 #### 1. Create conda environment
+
 ```bash
-conda create --name ssvp_slt python=3.10
+conda create --name ssvp_slt python=3.10 cmake
 conda activate ssvp_slt
-conda install pytorch==2.0.1 torchvision==0.15.2 pytorch-cuda=11.8 -c pytorch -c nvidia
+python -m pip install torch==2.2.0 torchvision==0.17.0 torchaudio==2.2.0 --index-url https://download.pytorch.org/whl/cu118
 conda install av -c conda-forge
 ```
 
 #### 2. (Optional) Video dataloader GPU decoding backend support
+
 If you want to use our video dataloader with GPU decoding backend, you need to reinstall torchvision by building it from scratch following the steps at [https://github.com/pytorch/vision/tree/main/torchvision/csrc/io/decoder/gpu](https://github.com/pytorch/vision/tree/main/torchvision/csrc/io/decoder/gpu). We found that this does not work with ffmpeg 6.1, so we recommend running `conda install 'ffmpeg<6.0'`. If you get a warning that torchvision is built without GPU decoding support due to `bsf.h` missing, we recommend manually downloading `bsf.h` from the `ffmpeg` source code ([https://github.com/FFmpeg/FFmpeg/blob/master/libavcodec/bsf.h](https://github.com/FFmpeg/FFmpeg/blob/master/libavcodec/bsf.h), **make sure it matches your ffmpeg version!**) and placing it under `$(path-to-your-conda)/ssvp_slt/include/libavcodec`.
 
 #### 3. Pip Installs
+
 Install the remaining dependencies and an egg of our ssvp-slt package:
+
 ```bash
 pip install git+https://github.com/facebookresearch/stopes.git
 pip install -r requirements.txt
@@ -32,17 +38,65 @@ cd ..
 pip install -e .
 ```
 
-#### 4. BLEURT 
+#### 4. Install dlib
+
+- Install CUDA and cuDNN with
+
+```bash
+conda install cuda cudnn -c nvidia
+```
+
+- Install dlib from the source (write your VS versions)
+
+```bash
+git clone https://github.com/davisking/dlib.git
+cd dlib
+mkdir build
+cd build
+cmake .. -DDLIB_USE_CUDA=1 -DUSE_AVX_INSTRUCTIONS=1 -DCUDAToolkit_ROOT=/path/to/your/conda/envs/dlib/bin/ -G "Visual Studio 17 2022" -A x64 --verbose
+cmake --build .
+cd ..
+python setup.py install --set DLIB_USE_CUDA=1
+```
+
+#### 5. BLEURT
+
 If you want to compute BLEURT scores as part of the translation evals (via `common.compute_bleurt=true`), you need to install BLEURT:
+
 ```bash
 pip install git+https://github.com/google-research/bleurt.git
 ```
 
-You will also likely need to set some environment variables before running the translation code: 
+#### 6. Set environment variables
+
+You will also likely need to set some environment variables before running the translation code:
+
 ```bash
 export XLA_FLAGS=--xla_gpu_cuda_data_dir=$(path-to-your-cuda-11.8)
 export LD_LIBRARY_PATH=$(path-to-your-cuda-11.8)/lib64:${LD_LIBRARY_PATH}
 ```
 
-#### 5. Weights and Biases
-If you want to use [Weights and Biases](https://wandb.ai) to track your training runs (via `cfg.wandb.enabled=true`), you need to ensure your `WAND_API_KEY` environment variable is set correctly.
+#### 7. Weights and Biases
+
+If you want to use [Weights and Biases](https://wandb.ai) to track your training runs (via `cfg.wandb.enabled=true`), you need to ensure your `WAND_API_KEY` environment variable is set correctly.
+
+#### 8. Test your env
+
+you can execute `test_env.py` python script to check if the dependencies are installed
+
+```bash
+python tests/test_env.py
+```
+
+The output (values could be quite different)
+
+```bash
+CUDA is available!
+2.2.0+cu118
+11.8
+Device count: 1
+Current device: 0
+Device name: NVIDIA A100-SXM4-40GB
+dlib CUDA is available!
+Number of dlib CUDA devices: 1
+```
diff --git a/src/ssvp_slt/modeling/sign_hiera.py b/src/ssvp_slt/modeling/sign_hiera.py
@@ -600,28 +600,27 @@ def from_clip_model(cls, model_id: str, clip_model_path: str) -> nn.Module:
         from ssvp_slt.modeling.clip import CLIP, CLIPTextCfg, CLIPVisionCfg
 
         checkpoint = torch.load(clip_model_path)
-        args = checkpoint["args"]
+        clip_model_cfg = checkpoint["cfg"]["model"]
         model_params = checkpoint["clip"]
 
         vision_cfg = CLIPVisionCfg(
-            model_id=args.model,
-            proj="mlp",
+            model_id=clip_model_cfg["vision_model_name"],
+            proj=clip_model_cfg["vision_model_proj"],
         )
 
-        # FIXME: might cause errors if proj and pooler are not `mlp` and `mean_pooler`
         text_cfg = CLIPTextCfg(
-            hf_model_name=args.text_model_name_or_path,
-            proj="mlp",
-            pooler_type="mean_pooler",
+            hf_model_name=clip_model_cfg["text_model_name_or_path"],
+            proj=clip_model_cfg["text_model_proj"],
+            pooler_type=clip_model_cfg["text_model_pooler"],
         )
-        clip = CLIP(embed_dim=768, vision_cfg=vision_cfg, text_cfg=text_cfg, output_dict=True)
+        clip = CLIP(embed_dim=clip_model_cfg.pop("embed_dim"), vision_cfg=vision_cfg, text_cfg=text_cfg, output_dict=True)
 
         print(f"Loading CLIP weights from {clip_model_path}")
         msg = clip.load_state_dict(model_params)
         print(msg)
 
         print("Loading SignHiera weights from CLIP vision tower")
-        model = sys.modules[__name__].__dict__[model_id](**args.__dict__)
+        model = sys.modules[__name__].__dict__[model_id](**clip_model_cfg)
         msg = model.load_state_dict(clip.visual.transformer.state_dict(), strict=False)
         print(msg)
 

diff --git a/tests/test_env.py b/tests/test_env.py
@@ -0,0 +1,29 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+import dlib
+
+# Check if CUDA is available and if it's using a GPU
+if torch.cuda.is_available():
+  print("CUDA is available!")
+  print(torch.__version__)
+  print(torch.version.cuda)
+  print(f"Device count: {torch.cuda.device_count()}")
+  print(f"Current device: {torch.cuda.current_device()}")
+  print(f"Device name: {torch.cuda.get_device_name(0)}")
+
+else:
+  print("CUDA is not available.")
+  print("Consider using a runtime with GPU acceleration in Colab.")
+
+
+# If dlib CUDA is available, print details
+if dlib.DLIB_USE_CUDA:
+  print("dlib CUDA is available!")
+  print(f"Number of dlib CUDA devices: {dlib.cuda.get_num_devices()}")
+else:
+  print("dlib CUDA is not available.")