Skip to content

Commit

Permalink
don't lora over the gate so we can check that is in fp32
Browse files Browse the repository at this point in the history
  • Loading branch information
winglian committed Jan 12, 2024
1 parent 4f0d078 commit 865329a
Showing 1 changed file with 40 additions and 8 deletions.
48 changes: 40 additions & 8 deletions tests/e2e/test_mixtral.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,15 @@ def test_qlora_w_fa2(self, temp_dir):
"lora_r": 4,
"lora_alpha": 8,
"lora_dropout": 0.1,
"lora_target_linear": True,
"lora_target_modules": [
"o_proj",
"w3",
"k_proj",
"v_proj",
"w1",
"q_proj",
"w2",
],
"val_set_size": 0.1,
"special_tokens": {},
"datasets": [
Expand All @@ -68,7 +76,7 @@ def test_qlora_w_fa2(self, temp_dir):

model, _ = train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
assert (
model.base_model.model.model.layers[0].block_sparse_moe.gate.dtype
model.base_model.model.model.layers[0].block_sparse_moe.gate.type()
== torch.float32
)
assert (Path(temp_dir) / "adapter_model.bin").exists()
Expand All @@ -87,7 +95,15 @@ def test_qlora_wo_fa2(self, temp_dir):
"lora_r": 4,
"lora_alpha": 8,
"lora_dropout": 0.1,
"lora_target_linear": True,
"lora_target_modules": [
"o_proj",
"w3",
"k_proj",
"v_proj",
"w1",
"q_proj",
"w2",
],
"val_set_size": 0.1,
"special_tokens": {},
"datasets": [
Expand All @@ -114,7 +130,7 @@ def test_qlora_wo_fa2(self, temp_dir):

model, _ = train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
assert (
model.base_model.model.model.layers[0].block_sparse_moe.gate.dtype
model.base_model.model.model.layers[0].block_sparse_moe.gate.type()
== torch.float32
)
assert (Path(temp_dir) / "adapter_model.bin").exists()
Expand All @@ -132,7 +148,15 @@ def test_16bit_lora_w_fa2(self, temp_dir):
"lora_r": 4,
"lora_alpha": 8,
"lora_dropout": 0.1,
"lora_target_linear": True,
"lora_target_modules": [
"o_proj",
"w3",
"k_proj",
"v_proj",
"w1",
"q_proj",
"w2",
],
"val_set_size": 0.1,
"special_tokens": {},
"datasets": [
Expand All @@ -159,7 +183,7 @@ def test_16bit_lora_w_fa2(self, temp_dir):

model, _ = train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
assert (
model.base_model.model.model.layers[0].block_sparse_moe.gate.dtype
model.base_model.model.model.layers[0].block_sparse_moe.gate.type()
== torch.float32
)
assert (Path(temp_dir) / "adapter_model.bin").exists()
Expand All @@ -177,7 +201,15 @@ def test_16bit_lora_wo_fa2(self, temp_dir):
"lora_r": 4,
"lora_alpha": 8,
"lora_dropout": 0.1,
"lora_target_linear": True,
"lora_target_modules": [
"o_proj",
"w3",
"k_proj",
"v_proj",
"w1",
"q_proj",
"w2",
],
"val_set_size": 0.1,
"special_tokens": {},
"datasets": [
Expand All @@ -204,7 +236,7 @@ def test_16bit_lora_wo_fa2(self, temp_dir):

model, _ = train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
assert (
model.base_model.model.model.layers[0].block_sparse_moe.gate.dtype
model.base_model.model.model.layers[0].block_sparse_moe.gate.type()
== torch.float32
)
assert (Path(temp_dir) / "adapter_model.bin").exists()
Expand Down

0 comments on commit 865329a

Please sign in to comment.