Skip to content

Commit

Permalink
Merge pull request #82 from microsoft/speedup_tests
Browse files Browse the repository at this point in the history
Speedup tests
  • Loading branch information
sordonia authored Aug 11, 2024
2 parents ebaf4e3 + 270ff37 commit e6995ff
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 19 deletions.
4 changes: 2 additions & 2 deletions mttl/models/containers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import Tuple

from mttl.config import Config
from mttl.logging import logger
from mttl.logging import logger, warn_once
from mttl.models.containers.base import ExpertContainer
from mttl.models.containers.kv_containers import KVExpertContainer
from mttl.models.containers.lora_containers import (
Expand Down Expand Up @@ -49,7 +49,7 @@ def get_container_class(modifier: str):
return LoRAExpertContainer
elif modifier == "skilled_lora":
if not os.environ.get("COALESCED_LORA_CONTAINER", "False") == "1":
logger.warning(
warn_once(
"COALESCED_LORA_CONTAINER is not set to 1, but still using it for SkilledLoRA"
)
return CoalescedLoRAExpertContainer
Expand Down
2 changes: 1 addition & 1 deletion mttl/models/containers/selectors/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ def add_expert(
self, expert_name: str, expert_info: ExpertInfo = None, is_default=False
):
if expert_info is None or expert_info.expert_task_name is None:
logger.warning(
logger.debug(
"Expert's task_name not set, assume task name corresponds to expert name!"
)
self._task_to_expert_name[expert_name] = expert_name
Expand Down
33 changes: 17 additions & 16 deletions tests/test_routed_multi_expert_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ def bigger_dummy_batch():
return batch


bs, max_seq_len = 10, 5


class TestMultiExpertModel:
def create_dummy_expert(self, config: ExpertConfig, exp_name) -> Expert:
model = MultiExpertModel(model=config.model, device_map="cpu")
Expand All @@ -112,7 +115,6 @@ def test_add_expert_with_action_merge(self, tmp_exp_config):

module = MultiExpertModel(**vars(config))
module.add_experts_from_dict(module_dict, action="merge")
bs, max_seq_len = 10, 100

assert isinstance(
module.model.transformer.h[0].attn.attention.k_proj, LoRAExpertContainer
Expand All @@ -132,7 +134,7 @@ def test_add_expert_with_action_merge(self, tmp_exp_config):

# Test Base Llama model
output = module(batch)
assert np.allclose(output.item(), 10.15, atol=0.1)
assert np.allclose(output.item(), 15.2, atol=0.1)

def nonzero_B_init(self, model):
gen = torch.Generator()
Expand All @@ -144,8 +146,9 @@ def nonzero_B_init(self, model):
mod.lora_a.data = torch.rand(mod.lora_a.shape, generator=gen) * 0.5
mod.lora_b.data = torch.rand(mod.lora_b.shape, generator=gen) * 0.5

@pytest.mark.parametrize("is_coalesced", [(True, False)])
def test_expert_selector_with_poly_task_routing(
self, tmp_exp_config
self, tmp_exp_config, is_coalesced
): # this fails, why?
seed_everything(0)
config: Config = tmp_exp_config
Expand All @@ -163,7 +166,6 @@ def test_expert_selector_with_poly_task_routing(
)
assert module.hparams.model_modifier == None
module.add_experts_from_dict(module_dict, action="route")
bs, max_seq_len = 10, 100

assert isinstance(
module.model.transformer.h[0].attn.attention.k_proj, LoRAExpertContainer
Expand All @@ -180,16 +182,16 @@ def test_expert_selector_with_poly_task_routing(
batch["attention_mask"] = attn_mask
batch["task_names"] = ["task_1", "task_2"] * 5

is_coalesced = os.environ.get("COALESCED_LORA_CONTAINER", "0") == "1"
os.environ["COALESCED_LORA_CONTAINER"] = str(is_coalesced)

# BASE MODEL FWD BASS (because all Bs are == 0, so functially same as backbone)
output = module(batch)
assert np.allclose(output.item(), 10.08 if is_coalesced else 10.20, atol=0.1)
assert np.allclose(output.item(), 15.625 if is_coalesced else 10.20, atol=0.1)

# Now let's change the adapter params, and also the function parameterized by the model
self.nonzero_B_init(module)
output = module(batch)
assert np.allclose(output.item(), 15.03 if is_coalesced else 14.69, atol=0.1)
assert np.allclose(output.item(), 18.37 if is_coalesced else 14.69, atol=0.1)

""" Multi-Head Routing Test """
# NOTE: We need to add SkilledLoRAs instead of standard LoRAs
Expand All @@ -214,7 +216,7 @@ def test_expert_selector_with_poly_task_routing(
output = module(batch)

# Because routing is initialized to uniform, should give same result
assert np.allclose(output.item(), 15.03 if is_coalesced else 15.27, atol=0.1)
assert np.allclose(output.item(), 19.125 if is_coalesced else 15.27, atol=0.1)

# Now let's change the routing, to make sure the output also changes
for mod in module.modules():
Expand All @@ -223,13 +225,15 @@ def test_expert_selector_with_poly_task_routing(
mod.module_logits.data[:, -1] = 999

output = module(batch)
assert np.allclose(output.item(), 15.56 if is_coalesced else 16.22, atol=0.1)
assert np.allclose(output.item(), 19.875 if is_coalesced else 16.22, atol=0.1)

# Finally, Test invalid tasks
batch["task_names"][-1] = "task_10"
with pytest.raises(AssertionError):
output = module(batch)

os.environ["COALESCED_LORA_CONTAINER"] = "0"

def test_expert_selector_with_task_name_routing(self, tmp_exp_config):
seed_everything(0)
config: Config = tmp_exp_config
Expand All @@ -244,8 +248,6 @@ def test_expert_selector_with_task_name_routing(self, tmp_exp_config):
module.add_experts_from_dict(module_dict, action="route")
module.set_default_expert("mod3")

bs, max_seq_len = 10, 100

assert isinstance(
module.model.transformer.h[0].attn.attention.k_proj, LoRAExpertContainer
)
Expand All @@ -268,7 +270,7 @@ def test_expert_selector_with_task_name_routing(self, tmp_exp_config):

# Test Base Llama model
output = module(batch)
assert np.allclose(output.item(), 10.1, atol=0.1)
assert np.allclose(output.item(), 12.3125, atol=0.1)

def test_expert_selector_with_poly_routing(self, tmp_exp_config):
seed_everything(0)
Expand All @@ -290,7 +292,6 @@ def test_expert_selector_with_poly_routing(self, tmp_exp_config):
# Model has been created. Now, we fix the generator to ensure that coalesced vs not coalesced gives the same as base llama
generator = torch.Generator()
generator.manual_seed(0)
bs, max_seq_len = 10, 100
batch = {
"input_ids": torch.randint(10, 400, (bs, max_seq_len), generator=generator),
"labels": torch.randint(10, 400, (bs, max_seq_len), generator=generator),
Expand All @@ -306,7 +307,7 @@ def test_expert_selector_with_poly_routing(self, tmp_exp_config):

# Test Base Llama model
output = module(batch)
assert np.allclose(output.item(), 10.1, atol=0.1)
assert np.allclose(output.item(), 12.3125, atol=0.1)

# check the get_router_weights function
weights = {}
Expand Down Expand Up @@ -345,7 +346,7 @@ def test_expert_selector_with_poly_routing(self, tmp_exp_config):
assert selector.module_logits_dict["mod2"].item() == 0.0

output = module(batch)
assert np.allclose(output.item(), 10.1, atol=0.1)
assert np.allclose(output.item(), 12.3125, atol=0.1)

weights = {}
for _, selector_dict in module.selector_cache.items():
Expand Down Expand Up @@ -531,7 +532,7 @@ def test_expert_selector_with_task_predictor_selection(self, tmp_exp_config):
module = MultiExpertModel(**vars(config))
module.add_experts_from_dict(module_dict, action="route")

bs, max_seq_len = 2, 100
bs = 2
batch = {
"input_ids": torch.randint(bs, 400, (bs, max_seq_len)),
"labels": torch.randint(bs, 400, (bs, max_seq_len)),
Expand Down

0 comments on commit e6995ff

Please sign in to comment.