don't lora over the gate so we can check that is in fp32

axolotl-ai-cloud · Jan 12, 2024 · 865329a · 865329a
1 parent 4f0d078
commit 865329a
Showing 1 changed file with 40 additions and 8 deletions.
diff --git a/tests/e2e/test_mixtral.py b/tests/e2e/test_mixtral.py
@@ -41,7 +41,15 @@ def test_qlora_w_fa2(self, temp_dir):
                 "lora_r": 4,
                 "lora_alpha": 8,
                 "lora_dropout": 0.1,
-                "lora_target_linear": True,
+                "lora_target_modules": [
+                    "o_proj",
+                    "w3",
+                    "k_proj",
+                    "v_proj",
+                    "w1",
+                    "q_proj",
+                    "w2",
+                ],
                 "val_set_size": 0.1,
                 "special_tokens": {},
                 "datasets": [
@@ -68,7 +76,7 @@ def test_qlora_w_fa2(self, temp_dir):
 
         model, _ = train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
         assert (
-            model.base_model.model.model.layers[0].block_sparse_moe.gate.dtype
+            model.base_model.model.model.layers[0].block_sparse_moe.gate.type()
             == torch.float32
         )
         assert (Path(temp_dir) / "adapter_model.bin").exists()
@@ -87,7 +95,15 @@ def test_qlora_wo_fa2(self, temp_dir):
                 "lora_r": 4,
                 "lora_alpha": 8,
                 "lora_dropout": 0.1,
-                "lora_target_linear": True,
+                "lora_target_modules": [
+                    "o_proj",
+                    "w3",
+                    "k_proj",
+                    "v_proj",
+                    "w1",
+                    "q_proj",
+                    "w2",
+                ],
                 "val_set_size": 0.1,
                 "special_tokens": {},
                 "datasets": [
@@ -114,7 +130,7 @@ def test_qlora_wo_fa2(self, temp_dir):
 
         model, _ = train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
         assert (
-            model.base_model.model.model.layers[0].block_sparse_moe.gate.dtype
+            model.base_model.model.model.layers[0].block_sparse_moe.gate.type()
             == torch.float32
         )
         assert (Path(temp_dir) / "adapter_model.bin").exists()
@@ -132,7 +148,15 @@ def test_16bit_lora_w_fa2(self, temp_dir):
                 "lora_r": 4,
                 "lora_alpha": 8,
                 "lora_dropout": 0.1,
-                "lora_target_linear": True,
+                "lora_target_modules": [
+                    "o_proj",
+                    "w3",
+                    "k_proj",
+                    "v_proj",
+                    "w1",
+                    "q_proj",
+                    "w2",
+                ],
                 "val_set_size": 0.1,
                 "special_tokens": {},
                 "datasets": [
@@ -159,7 +183,7 @@ def test_16bit_lora_w_fa2(self, temp_dir):
 
         model, _ = train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
         assert (
-            model.base_model.model.model.layers[0].block_sparse_moe.gate.dtype
+            model.base_model.model.model.layers[0].block_sparse_moe.gate.type()
             == torch.float32
         )
         assert (Path(temp_dir) / "adapter_model.bin").exists()
@@ -177,7 +201,15 @@ def test_16bit_lora_wo_fa2(self, temp_dir):
                 "lora_r": 4,
                 "lora_alpha": 8,
                 "lora_dropout": 0.1,
-                "lora_target_linear": True,
+                "lora_target_modules": [
+                    "o_proj",
+                    "w3",
+                    "k_proj",
+                    "v_proj",
+                    "w1",
+                    "q_proj",
+                    "w2",
+                ],
                 "val_set_size": 0.1,
                 "special_tokens": {},
                 "datasets": [
@@ -204,7 +236,7 @@ def test_16bit_lora_wo_fa2(self, temp_dir):
 
         model, _ = train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
         assert (
-            model.base_model.model.model.layers[0].block_sparse_moe.gate.dtype
+            model.base_model.model.model.layers[0].block_sparse_moe.gate.type()
             == torch.float32
         )
         assert (Path(temp_dir) / "adapter_model.bin").exists()