Merge pull request #6253 from hiyouga/hiyouga/qwen2vl_mm_proj

[model] support qwen2vl train proj only
hiyouga · Dec 5, 2024 · bcb4fb3 · bcb4fb3
2 parents 561a8e5 + 99c6266
commit bcb4fb3
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 4 deletions.
diff --git a/examples/train_full/qwen2vl_full_sft.yaml b/examples/train_full/qwen2vl_full_sft.yaml
@@ -6,6 +6,7 @@ stage: sft
 do_train: true
 finetuning_type: full
 freeze_vision_tower: true  # choices: [true, false]
+train_mm_proj_only: false  # choices: [true, false]
 deepspeed: examples/deepspeed/ds_z3_config.json  # choices: [ds_z0_config.json, ds_z2_config.json, ds_z3_config.json]
 
 ### dataset

diff --git a/src/llamafactory/model/model_utils/visual.py b/src/llamafactory/model/model_utils/visual.py
@@ -138,11 +138,10 @@ def get_forbidden_modules(config: "PretrainedConfig", finetuning_args: "Finetuni
             forbidden_modules.add("language_model")
 
     elif model_type == "qwen2_vl":
-        if finetuning_args.freeze_vision_tower:
-            forbidden_modules.add("visual")
-
         if finetuning_args.train_mm_proj_only:
-            raise ValueError("Qwen2-VL models do not support `train_mm_proj_only`.")
+            forbidden_modules.update({"visual.patch_embed", "visual.blocks", "model", "lm_head"})
+        elif finetuning_args.freeze_vision_tower:
+            forbidden_modules.add("visual")
 
     return forbidden_modules